############################################################ Mon Nov 8 14:11:05 PST 1999 Documentation on PIII Linux cluster 3 front-ends 64 nodes # Killing XXX's jobs foreach u (XXX) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end ############################################################ (1) vn???.physics.ubc.ca 2. the IP information relating to this project is: Subnet: 142.103.237.0 Netmask: 255.255.255.0 Gateway: 142.103.237.254 142.103.237.252 vnswitch.physics.ubc.ca 142.103.237.225 vn225.physics.ubc.ca vnf1.physics.ubc.ca 142.103.237.226 vn226.physics.ubc.ca vnf2.physics.ubc.ca 142.103.237.227 vn227.physics.ubc.ca vnf3.physics.ubc.ca 142.103.237.1 vn1.physics.ubc.ca 142.103.237.2 vn2.physics.ubc.ca 142.103.237.3 142.103.237.4 142.103.237.5 142.103.237.6 142.103.237.7 142.103.237.8 142.103.237.9 142.103.237.10 142.103.237.11 142.103.237.12 142.103.237.13 142.103.237.14 142.103.237.15 142.103.237.16 142.103.237.17 142.103.237.18 142.103.237.19 142.103.237.20 142.103.237.21 142.103.237.22 142.103.237.23 142.103.237.24 142.103.237.25 142.103.237.26 142.103.237.27 142.103.237.28 142.103.237.29 142.103.237.30 142.103.237.31 142.103.237.32 142.103.237.33 142.103.237.34 142.103.237.35 142.103.237.36 142.103.237.37 142.103.237.38 142.103.237.39 142.103.237.40 142.103.237.41 142.103.237.42 142.103.237.43 142.103.237.44 142.103.237.45 142.103.237.46 142.103.237.47 142.103.237.48 142.103.237.49 142.103.237.50 142.103.237.51 142.103.237.52 142.103.237.53 142.103.237.54 142.103.237.55 142.103.237.56 142.103.237.57 142.103.237.58 142.103.237.59 142.103.237.60 142.103.237.61 142.103.237.62 142.103.237.63 142.103.237.64 vn64.physics.ubc.ca ============================================================ Tue Nov 9 16:10:17 PST 1999 Setting up cluster switch ============================================================ # Communicating via serial port attached to laptop minicom Console Main Menu 2 Switch Management Access Configuration Time Protocol Config [DHCP] Disabled IP Config : Manual IP address: 142.103.237.252 Subnet Mask: 255.255.255.0 Gateway: 142.103.237.254 ============================================================ Fri Nov 12 12:49:49 PST 1999 ============================================================ (1) First vnfe[123] and vn[1-16] up and running, configuring laptop as vnlap.physics.ubc.ca (leave hostname as rar052.ubc.ca) IP address 142.103.237.230 Subnet mask 255.255.255.0 Gateway 142.103.237.254 DNS 142.103.237.1 137.82.1.1 (2) Laptop now connected to cluster, continue with secondary setup of vnfe[123], but first need to add laptop to floppy setup and distribute to vnfe1, ... ============================================================ Fri Nov 12 17:30:39 PST 1999 ============================================================ (1) Cluster now on network, re-setting DNS default domain physics.ubc.ca nameserver 1 137.82.1.1 nameserver 2 137.82.28.3 ============================================================ Fri Nov 12 13:10:31 PST 1999 ============================================================ # As root@vnfe[123] THE FOLLOWING IS NOW DONE VIA THE SETUP SCRIPT cd chsh -s /bin/tcsh mv .tcshrc .tcshrc.O mv .vimrc .vimrc.O #Modified /etc/csh.cshrc as per ../LINUX/README ftp 142.103.237.230 # login as matt cd system/UNIX_SETUP/dist/linux_cshrc get csh.cshrc /etc/csh.cshrc quit # Enable Remote Shell vi /etc/inetd.conf shell stream tcp nowait root /usr/sbin/tcpd in.rshd killall -HUP inetd # Make .rhosts cat > ~/.rhosts bh1 bh2 bh3 bh4 bh5 bh6 laplace bh1.physics.ubc.ca matt bh2.physics.ubc.ca matt bh3.physics.ubc.ca matt bh4.physics.ubc.ca matt bh5.physics.ubc.ca matt bh6.physics.ubc.ca matt laplace.physics.ubc.ca matt END OF STUFF DONE BY STARTUP SCRIPT # Install ssh mkdir -p /var/tmp/install cd !$ ftp vnlap cd system/UNIX_SETUP/dist prompt mget ssh-1.2.27.tar.gz mpich.tar.gz quit tar zxf ssh-1.2.27.tar.gz cd ssh-1.2.27 ./configure --prefix=/usr/local make install cd /etc/rc.d/ ftp laplace # login as matt cd /usr2/people/matt/system/UNIX_SETUP/dist/linux_rc.local get rc.local quit /etc/rc.d/rc.local start cd; cd .ssh /usr/local/bin/ssh-keygen # Saved and distributed keys # Goto "as matt" cd cd .ssh cp ~matt/.ssh/authorized_keys . # Installation scripts cd /home/matt/system/UNIX_SETUP/root@bh1.physics.ubc.ca/install scp Install Makefile root@vnfe1.physics.ubc.ca:/usr/tmp/install scp Install Makefile root@vnfe2.physics.ubc.ca:/usr/tmp/install scp Install Makefile root@vnfe3.physics.ubc.ca:/usr/tmp/install # NFS---Typical exportfs entry (different than SGI) ######################################################################## Delay NFS set-up until we get on the network ######################################################################## #----------------------------------------------------------- /etc/fstab #----------------------------------------------------------- #As root@vnfe1 mkdir -p /d/vnfe1 ln -s /home /d/vnfe1/home ln -s /home2 /d/vnfe1/home2 mkdir -p /d/vnfe2/home /d/vnfe2/home2 /d/vnfe3/home /d/vnfe3/home2 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 #As root@vnfe2 mkdir -p /d/vnfe2 ln -s /home /d/vnfe2/home ln -s /home2 /d/vnfe2/home2 mkdir -p /d/vnfe1/home /d/vnfe1/home2 /d/vnfe3/home /d/vnfe3/home2 vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 #As root@vnfe1 mkdir -p /d/vnfe3 ln -s /home /d/vnfe3/home ln -s /home2 /d/vnfe3/home2 mkdir -p /d/vnfe1/home /d/vnfe1/home2 /d/vnfe2/home /d/vnfe2/home2 vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 # Make sure NFS is enabled (linuxconf) rrvi /usr/local/util/machines/machines.LINUX /etc/exports /home vn*.physics.ubc.ca(rw) bh*.physics.ubc.ca(rw) laplace.physics.ubc.ca(rw) godel.physics.ubc.ca(rw) /home2 vn*.physics.ubc.ca(rw) bh*.physics.ubc.ca(rw) laplace.physics.ubc.ca(rw) godel.physics.ubc.ca(rw) rrcmd 'exportfs -uav; exportfs -av' rrcmd 'mount -a; df' rrvi /etc/fstab ######################################################################## NFS up and apparently working ######################################################################## # MPI including patch to server cdi tar zxf mpich.tar.gz cd mpich ./configure -opt="-O" -nodevdebug --prefix=/usr/local scp matt@vnlap:~/system/UNIX_SETUP/dist/mpich_patch_linux/servers/serv_p4.c /var/tmp/install/mpich/mpid/ch_p4/p4/servers/ scp matt@vnlap:~/system/UNIX_SETUP/dist/mpich_patch_linux/server/serv_p4.c /var/tmp/install/mpich/mpid/server make make install ln -s /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/lib/libmpi.a cp /var/tmp/install/mpich/include/mpidefs.h /usr/local/include vi /usr/local/util/machines/machines.LINUX vnfe1.physics.ubc.ca vnfe2.physics.ubc.ca vnfe3.physics.ubc.ca setenv HOSTNAMES /usr/local/util/machines/machines.LINUX rrcmd '(cd /var/tmp/install/mpich/examples/basic; make)' # MPI TESTING cd /var/tmp/install/mpich/examples/basic mpirun -np 2 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca pi is approximately 3.1416009869231241, Error is 0.0000083333333309 wall clock time = 0.000729 mpirun -np 3 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca Process 2 on vnfe3.physics.ubc.ca pi is approximately 3.1416009869231249, Error is 0.0000083333333318 wall clock time = 0.000848 # Perftest with 1 and 2 rrcmd '(cd /var/tmp/install/mpich/examples/perftest; make)' cd /var/tmp/install/mpich/examples/perftest date; mpirun -np 2 ./mpptest -gnuplot -rate -size 16000 400000 16000 -fname long.gpl; date ********** NEXT #----------------------------------------------------------- # RNPL #----------------------------------------------------------- cdi Install rnpl cd rnpl make full #----------------------------------------------------------- # RNPL BENCHMARKS (see matt@laplace:~/system/BENCHMARKS/workstations_varsity) #----------------------------------------------------------- #----------------------------------------------------------- # HDF (for DAGH/GRACE) #----------------------------------------------------------- cd /usr2/people/matt/system/UNIX_SETUP/dist netscape ftp://ftp.ncsa.uiuc.edu/HDF/HDF/HDF4.1r3/tar/HDF4.1r3.tar.gz # As root@bh[5] cdi ssh matt@laplace "cd /usr2/people/matt/system/UNIX_SETUP/dist; zcat HDF4.1r3.tar.Z" | tar xf - cd HDF4.1r3 configure --prefix=/usr/local make make install ------------------------------------------------------------ As matt@vnfe[123] ------------------------------------------------------------ cd; mkdir .ssh cd .ssh /usr/local/bin/ssh-keygen # Distribute keys to vnlap:/home/matt/system/UNIX_SETUP/ssh/authorized_keys ftp vnlap cd /home/matt/system/UNIX_SETUP/ssh/authorized_keys get authorized_keys # Re-install sshd on laptop cd /home/matt/system/UNIX_SETUP/matt@bh1.physics.ubc.ca/matt tar cf - . | ssh matt@vnfe1 'tar xf -' tar cf - . | ssh matt@vnfe2 'tar xf -' tar cf - . | ssh matt@vnfe3 'tar xf -' ------------------------------------------------------------ As root@bh6 ------------------------------------------------------------ cdi Install rvs setenv PACK rvs; ./Install $PACK; cd $PACK; make install # This only necessary due to advocated setting of RNPL_FLIBS setenv PACK rvsso; cdi; ./Install $PACK; cd $PACK; make install setenv PACK vutil; cdi; ./Install $PACK; cd $PACK; make install setenv PACK utilio; cdi; ./Install $PACK; cd $PACK; make install setenv PACK utilmath; cdi; ./Install $PACK; cd $PACK; make install setenv PACK emkgcnad; cdi; ./Install $PACK; cd $PACK; make install NEXT_ACTIONS INSTALL: Java Development Kit/scivis lapack/linpack/odepack/fftpack IBM data explorer ============================================================ Wed Nov 10 18:24:03 PST 1999 ============================================================ (1) Made ~matt /d/bh1/home/matt on bh[123456] Beware permission problems (particularly no-execute on home directories etc. ******** INCLUDE NUMERIC IP ADDRESSES FOR laplace.physics.ubc.ca godel.physics.ubc.ca bh[123456].physics.ubc.ca rar0502.net.ubc.ca in hosts.allow hosts on vnfe[123] ############################################################ Sat Nov 13 09:39:07 PST 1999 ############################################################ (1) From /usr2/people/matt/system/UNIX_SETUP/cluster/scripts.3/README ============================================================ Fri Nov 12 19:52:34 PST 1999 Scripts for tertiary configuration of nodes (ssh) ============================================================ Need to modify basic set up script to update rc.local cd /etc/rc.d cp rc.local rc.local.O vi rc.local if [ -f /usr/local/sbin/sshd ]; then /usr/local/sbin/sshd & fi /etc/rc.d/rc.local start ============================================================ (1) do_setup ftp laplace matt cd /usr2/people/matt/system/UNIX_SETUP/cluster/scripts.3 get do_setup quit sh do_setup /root # Accumulate authorized_keys in ... (2) Problem with 'vn8' *** Can't find server name for address 142.103.237.1: No response from server *** Can't find server name for address 137.82.1.1: No response from server *** Default servers are not available No router --- 142.103.237.254 route add 142.103.237.254 doesn't work # Enabling -x in /etc/sysconfig/network-scripts/ifup vn[78] (link to # /sbin/ifup # Putting tracing in /sbin/ifup echo "ifup: IPX = <$IPX>" echo "ifup: CONFIG = <$CONFIG>" echo "ifup: DEVICE = <$DEVICE>" # Putting tracing in ifcfg-eth0 # Examination of ifcfg-eth0 suggests possible solution #BEFORE DEVICE="eth0" IPADDR="142.103.237.8" NETMASK="255.255.255.0" ONBOOT="yes" BOOTPROTO="none" IPXNETNUM_802_2="" IPXPRIMARY_802_2="no" IPXACTIVE_802_2="no" IPXNETNUM_802_3="" IPXPRIMARY_802_3="no" IPXACTIVE_802_3="no" IPXNETNUM_ETHERII="" IPXPRIMARY_ETHERII="no" IPXACTIVE_ETHERII="no" IPXNETNUM_SNAP="" IPXPRIMARY_SNAP="no" IPXACTIVE_SNAP="no" #AFTER Manual route add default gw 142.103.237.254 eth0 works, could add to rc.local, but need to figure out what's going on hostname -s doesn't work on vn8 (3) # As matt@laplace.physics.ubc.ca cd .ssh foreach i (`iota 16`) scp authorized_keys root@142.103.237.${i}:~/.ssh end (4) Possible problem with vn5, vn6, ... Don't't prompt for password without authorized_keys (using .rhosts) ########################################################################### NFS ########################################################################### (5) Implemented some "vn" scripts for "remote" execution. Still something wrong with vn8 cds; vnRemote vnTest . . . vn6.physics.ubc.ca -- Sat Nov 13 10:24:30 PST 1999 + scp vnTest root@142.103.237.7:/tmp + ssh root@142.103.237.7 /tmp/vnTest vn7.physics.ubc.ca -- Sat Nov 13 10:23:15 PST 1999 + scp vnTest root@142.103.237.8:/tmp + ssh root@142.103.237.8 /tmp/vnTest /usr/X11R6/bin/xauth: (stdin):1: bad display name "vn8.physics.ubc.ca:10.0" in "add" command vn8.physics.ubc.ca -- Sat Nov 13 10:23:36 PST 1999 + scp vnTest root@142.103.237.9:/tmp + ssh root@142.103.237.9 /tmp/vnTest vn9.physics.ubc.ca -- Sat Nov 13 10:22:27 PST 1999 + scp vnTest root@142.103.237.10:/tmp + ssh root@142.103.237.10 /tmp/vnTest vn10.physics.ubc.ca -- Sat Nov 13 11:21:49 PST 1999 + scp vnTest root@142.103.237.11:/tmp + ssh root@142.103.237.11 /tmp/vnTest . . . vnNFSsetup worked like a charm---modulo Xauthority problem on vn8 ############################################################ Sat Nov 13 13:28:41 PST 1999 ############################################################ (1) vn8 problem may be due to improper network configuration, in particular primary name+domain was not set Missing 142.103.237.8 vn8.physics.ubc.ca vn7 142.103.237.9 vn9.physics.ubc.ca vn7 in /etc/hosts **** vn8 problem apparently fixed RNPL, utilities, start setting up user accounts ############################################################ Sat Nov 13 15:39:27 PST 1999 ############################################################ (1) Setting up matt accounts on all machines via vnRemote vnSetupMatt # vnSetupMatt # Didn't work !! cd .ssh foreach i (`iota 16`) (ssh matt@142.103.237.$i "cat ~/.ssh/identity.pub") >> authorized_keys_matt_vn end # Problem with vn4 # Problem with vn1 (bh1) ############################################################ DO future installation from /usr2/people/matt/system/UNIX_SETUP/matt@vn.physics.ubc.ca/ ############################################################ ############################################################ Sun Nov 14 06:34:40 PST 1999 ############################################################ (0) vnNewUsers Make sure a+rx permission is set on home directories Current procedure somewhat awkward Mastering/mirroring in matt@laplace:/usr2/people/matt/system/vn/image/master/etc Actual creation from matt@vnfe1:/home/matt/system/vn/accounts PROCEDURE (1) # As matt@laplace cd /usr2/people/matt/system/vn/image/master/etc make import (2) # As matt@laplace cd /usr2/people/matt/system/vn/accounts # Make account file foo:Phys^98:690:600:Foo bar:/d/vnfe1/home/foo:/bin/tcsh bar:Phys^98:691:600:Bar barella:/d/vnfe2/home/bar:/bin/tcsh (3) # As matt@vnfe1 cd /home/matt/system make update_vn cd /home/matt/system/vn/accounts vnNewUsers SEE HOWTO FOR CURRENT "STREAMLINED" PROCEDURE (1) HOMEMWC set to /home/matt instead of /d/vnfe1/home/matt Keep master .?? files for root in /usr2/people/matt/system/vn/image/master/etc distribute via vnDistEtc Mirror in /usr2/people/matt/system/UNIX_SETUP/cluster/scripts (2) SOFTWARE INSTALLATION # Note: Encountered difficulties "Install"ing via ssh and tar, # modified Install to work directly from ~matt on vnfe1; # must copy .Z files from matt@laplace to matt@vnfe1 prior to # install MPI (finish and test later) RNPL rnpl vnallCommand "cdi; Install rnpl" MWC_UTIL rvs vnallCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio OK utilmath vnallCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" # Difficulty may be with 'remote tar'---try explicit copy of file from # vnfe1 # Modified and distributed 'Install' (3) /etc/csh.cshrc BE VERY CAREFUL MODIFYING THIS FILE! #ADDED setenv TMP /tmp/${USER} test -d $TMP || mkdir $TMP setenv RNPL_RNPL "/usr/local/bin/rnpl" setenv RNPL_F77 "f77 -O2" setenv RNPL_F77LOAD "f77 -O2 -L/home/matt/lib" setenv RNPL_F77PP "touch" setenv RNPL_FLIBS "-lrnpl -lvsso" setenv CFLAGS "-O2" setenv F77FLAGS "-O2" setenv HOSTNAMES /usr/local/util/machines/machines.LINUX TODO --- WEB SITE WITH BRIEF OPERATING INSTRUCTIONS (done) --- BANNERS --- SCIENTIFIC SOFTWARE ############################################################ Sun Nov 14 17:02:55 PST 1999 ############################################################ (1) # Distribution status # Mastering system/vn # on vn and system/UNIX_SETUP/cluster # on laplace. # Change system/vn -> update matt@laplace:~/system/vn cd system make export_vn # matt@laplace:~/system/vn -> matt@laplace:~/system/UNIX_SETUP/cluster/scripts‰ cd /usr2/people/matt/system/vn/image/master/etc make export (1a) NOTE ***: Need distinct system/Makefiles on distinct systems, time to start using CVS! Aliased # Master as matt@vnfe1 etc -> cd's to /home/matt/system/vn/image/master/etc exetc -> etc and 'make export' vn -> cd's to system/vn exvn -> cd system and 'make update_vn' ############################################################ (2) # To add user (with plain text password) ssh matt@vnfe1.physics.ubc.ca ssh matt@142.103.237.225 nu # Make new user file vnNewUsers # Mirror master info on laplace/floppy cd system make update_vn # To change passwd---as matt@vnfe1 # Unprotect/edit/protect shadow then distribute. etc sola vs vnDistEtc shadow ############################################################ Current password hack root@vnfe1:/usr/bin/passwd is script which invokes real passwd /usr/bin/passwd.real, then distributes via vnDistEtc ############################################################ (3) # As root@vnfe1 cd /usr/bin cp mv passwd passwd.real cat< /usr/bin/passwd #!/bin/sh passwd.real $* trap '' 1 2 15 echo "Please be patient while the network passwd files are updated" (ssh matt@142.103.237.225 "cd /home/matt/system/vn/image/master/etc; make import" 2>&1 /dev/null) > /dev/null /d/vnfe1/home/matt/scripts/vnDistEtc shadow 2>&1 > /dev/null END chmod u+xs /usr/bin/passwd ############################################################ Defeating Password aging matt:$1$6y4t41/.$rSBbYDbLXHomm3qQe4Ujt1:10910:0:99999:7::: ehonda:tK.CuDBGxcU9U:10910:0:99999:7::: man 5 shadow shadow contains the encrypted password information for user's accounts and optional the password aging information. Included is Login name Encrypted password Days since Jan 1, 1970 that password was last changed Days before password may be changed Days after which password must be changed Days before password is to expire that user is warned Days after password expires that account is disabled Days since Jan 1, 1970 that account is disabled A reserved field The password field must be filled. The encryped password consists of 13 to 24 char- acters from the 64 character alphabet a thru z, A thru Z, 0 thru 9, . and /. Refer to crypt(3) for details on how this string is interpreted. The date of the last password change is given as the number of days since Jan 1, 1970. The password may not be changed again until the proper number of days have passed, and must be changed after the maximum number of days. If the minimum number of days required is greater than the maximum number of day allowed, this password may not be changed by the user. ############################################################ Mon Nov 15 07:33:54 PST 1999 ############################################################ (1) Updating secondary set-up floppy. matt@rar0502:/home/matt/system/UNIX_SETUP/cluster/ cd scripts # Change 'matt's home directory to /home/matt (temporarily) # Actually, safer *NOT* to install passwd/shadow in this manner # Will leave do_setup as is ############################################################ Mon Nov 15 10:23:14 PST 1999 ############################################################ (1) Installing gmp as per roman's request # As root@bh1 cdi scp gmp-2.0.2.tar.gz matt@142.103.237.225:~/autoconf/ # Master Install for vn /home/matt/system/vn/image/master/install/Install # Make duplicate version for g-zipped archives /home/matt/system/vn/image/master/install/Installz make export (2) vnallCommand 'cdi; ./Installz gmp-2.0.2' vnallCommand 'ls -lt /usr/local/lib/libgmp.a' ############################################################ Tue Nov 16 11:03:45 PST 1999 Scientific Software ############################################################ (1) Create configurable versions of linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' lapack fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' # Master on matt@laplace:~/autoconf ############################################################ Tue Nov 16 19:14:48 PST 1999 ############################################################ Something wrong with vn1 SEE README.CRASH ############################################################ Tue Nov 16 23:34:50 PST 1999 ############################################################ (1) More MPI setup. Wrote vnMakeMPIMachines generates machines.vnfe1 machines.vnfe2 machines.vnfe3 machines.vn1 . . . machines.vn16 and scp's to /usr/local/util/machines/machines.LINUX # As root@vnfe1 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 19 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca Process 2 on vnfe3.physics.ubc.ca Process 5 on vn3.physics.ubc.ca Process 3 on vn1.physics.ubc.ca Process 8 on vn6.physics.ubc.ca Process 4 on vn2.physics.ubc.ca Process 7 on vn5.physics.ubc.ca Process 6 on vn4.physics.ubc.ca Process 10 on vn8.physics.ubc.ca Process 9 on vn7.physics.ubc.ca Process 11 on vn9.physics.ubc.ca Process 12 on vn10.physics.ubc.ca Process 14 on vn12.physics.ubc.ca Process 13 on vn11.physics.ubc.ca Process 15 on vn13.physics.ubc.ca Process 16 on vn14.physics.ubc.ca Process 17 on vn15.physics.ubc.ca Process 18 on vn16.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 wall clock time = 0.209264 # As root@vn16 Process 0 on vn16.physics.ubc.ca Process 2 on vn2.physics.ubc.ca Process 10 on vn10.physics.ubc.ca Process 1 on vn1.physics.ubc.ca Process 4 on vn4.physics.ubc.ca Process 5 on vn5.physics.ubc.ca Process 3 on vn3.physics.ubc.ca Process 8 on vn8.physics.ubc.ca Process 7 on vn7.physics.ubc.ca Process 6 on vn6.physics.ubc.ca Process 9 on vn9.physics.ubc.ca Process 12 on vn12.physics.ubc.ca Process 11 on vn11.physics.ubc.ca Process 13 on vn13.physics.ubc.ca Process 18 on vnfe3.physics.ubc.ca Process 14 on vn14.physics.ubc.ca Process 15 on vn15.physics.ubc.ca Process 16 on vnfe1.physics.ubc.ca Process 17 on vnfe2.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 wall clock time = 0.006312 ############################################################ Wed Nov 17 15:45:25 PST 1999 ############################################################ (1) vn17-vn48 set up in a pretty straightforward fashion. 3 nodes had BIOS set to require keyboard for boot. Remedied, and had to properly seat a couple of ethernet cables. Apart from that pretty uneventful. (2) Wrote vnSshInstall to install ssh, ending with root's identity.pub which will be continue to be accumulated in matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys cd /tmp cat > vnSshInstall chmod a+x vnSshInstall ./vnSshInstall telnet vn17 telnet vn18 telnet vn19 telnet vn20 telnet vn21 telnet vn22 telnet vn23 telnet vn24 telnet vn25 telnet vn26 telnet vn27 telnet vn28 telnet vn29 telnet vn30 telnet vn31 telnet vn32 telnet vn33 telnet vn34 telnet vn35 telnet vn36 telnet vn37 telnet vn38 telnet vn39 telnet vn40 telnet vn41 telnet vn42 telnet vn43 telnet vn44 telnet vn45 telnet vn46 telnet vn47 telnet vn48 # ssh set up # Check /usr/local/sbin/sshd on # vn18, vn19, vn20, vn21 # FOUND BUG IN do_setup which had .cshrc -> .aliases etc. # Secondary floppy apparently didn't get executed on # vn18 --- bad rc.local cd /tmp /usr/local/bin/scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/UNIX_SETUP/cluster/scripts_as_matt.tar.gz . tar xfz scripts_as_matt.tar.gz cd scripts_as_matt sh do_setup /root /etc/rc.d/rc.local start # . files bad .cshrc .rhosts .aliases .exrc cd /root/.ssh scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/.ssh/authorized_keys . # vn19 --- bad rc.local cd /root /bin/rm -f .aliases .rhosts .exrc cd /tmp/scripts_as_matt cp .aliases .rhosts .exrc /root cd /root/.ssh /usr/local/bin/scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/.ssh/authorized_keys . # vn20 --- bad rc.local # vn21 --- bad rc.local /etc/rc.d/rc.local start As matt@vnfe1 cd .ssh foreach i (`ifor 48 1 -1`) scp authorized_keys root@vn${i}:~/.ssh end ############################################################ Wed Nov 17 15:45:25 PST 1999 ############################################################ (1) fstab and NFS # temp. modified vnCommand, vnRemote to use 'vnnewN' -> 17-48 ssh root@vnfe1 "cat /d/vnfe1/home/matt/system/vn/image/master/etc/fstab_frag" vnRemote vnNFSsetup (2) Matt setup vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Update master_authorized_keys (3) /etc setup etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow ############################################################ Thu Nov 18 19:45:23 PST 1999 ############################################################ (1) vnfe1 ran out of processes (too many defunct), had to be manually reset 1316 ? Z 0:00 [kwmsound ] ############################################################ Thu Nov 18 20:24:57 PST 1999 ############################################################ (1) vn18 down Took 18 out of vnN test $i = 18 || printf "142.103.237.%d " $i >>> Executing as root@142.103.237.225 >>> Executing as root@142.103.237.226 564 ? Z 0:15 [mysqld ] >>> Executing as root@142.103.237.227 >>> Executing as root@142.103.237.1 >>> Executing as root@142.103.237.2 >>> Executing as root@142.103.237.3 491 ? Z 0:00 [qmgr ] 544 ? Z 0:14 [mysqld ] 601 ? Z 0:13 [actived ] 602 ? Z 0:05 [innd ] 646 ? Z 0:01 [prefdm ] 750 ? Z 0:12 [rwhod ] 16351 ? Z 0:00 [tcsh ] 16364 ? Z 0:00 [cactus_linux_mp ] 23975 ? Z 0:00 [pickup ] 24023 ? Z 0:00 [tcsh ] 24034 ? Z 0:00 [wave ] 24141 ? Z 0:00 [ssh1 ] >>> Executing as root@142.103.237.4 >>> Executing as root@142.103.237.5 290 ? Z 0:10 [syslogd ] 300 ? Z 0:07 [klogd ] 486 ? Z 0:10 [master ] 494 ? Z 0:01 [qmgr ] 509 ? Z 0:29 [gpm ] 544 ? Z 0:12 [mysqld ] 601 ? Z 0:12 [actived ] 752 ? Z 0:13 [rwhod ] 15705 ? Z 0:00 [tcsh ] 15718 ? Z 0:00 [cactus_linux_mp ] 15727 ? Z 0:00 [tcsh ] 15739 ? Z 0:00 [cactus_linux_mp ] 22332 ? Z 0:00 [pickup ] 22380 ? Z 0:00 [tcsh ] 22390 ? Z 16:00 [wave ] 22391 ? Z 0:00 [wave ] 22416 ? Z 0:00 [tcsh ] 22431 ? Z 0:00 [tcsh ] 22447 ? Z 0:00 [anacron ] 22482 ? Z 0:00 [tcsh ] 22489 ? Z 0:00 [tcsh ] 22506 ? Z 0:00 [tcsh ] 22516 ? Z 0:00 [anacron ] 22548 ? Z 0:00 [tcsh ] 22562 ? Z 0:00 [tcsh ] 22578 ? Z 0:00 [tcsh ] 22584 ? Z 0:00 [ssh1 ] 22597 ? Z 0:00 [tcsh ] 22612 ? Z 0:00 [tcsh ] 22636 ? Z 0:00 [tcsh ] 22652 ? Z 0:00 [tcsh ] 22665 ? Z 0:00 [anacron ] 22700 ? Z 0:00 [tcsh ] 22718 ? Z 0:00 [tcsh ] 22734 ? Z 0:00 [tcsh ] 22749 ? Z 0:00 [tcsh ] 22767 ? Z 0:00 [tcsh ] 22785 ? Z 0:00 [tcsh ] 22801 ? Z 0:00 [tcsh ] 22818 ? Z 0:00 [tcsh ] 22833 ? Z 0:00 [tcsh ] 22848 ? Z 0:00 [tcsh ] 22863 ? Z 0:00 [tcsh ] 22878 ? Z 0:00 [tcsh ] 22893 ? Z 0:00 [tcsh ] 22906 ? Z 0:00 [tcsh ] 22921 ? Z 0:00 [tcsh ] 22936 ? Z 0:00 [tcsh ] 22948 ? Z 0:00 [anacron ] 22980 ? Z 0:00 [tcsh ] 22995 ? Z 0:00 [tcsh ] 23019 ? Z 0:00 [tcsh ] 23035 ? Z 0:00 [tcsh ] 23050 ? Z 0:00 [tcsh ] 23064 ? Z 0:00 [anacron ] 23101 ? Z 0:00 [tcsh ] 23121 ? Z 0:00 [anacron ] 23158 ? Z 0:00 [tcsh ] 23174 ? Z 0:00 [tcsh ] 23192 ? Z 0:00 [tcsh ] 23211 ? Z 0:00 [tcsh ] >>> Executing as root@142.103.237.6 >>> Executing as root@142.103.237.7 561 ? Z 0:09 [mysqld ] >>> Executing as root@142.103.237.8 560 ? Z 0:08 [mysqld ] >>> Executing as root@142.103.237.9 >>> Executing as root@142.103.237.10 >>> Executing as root@142.103.237.11 >>> Executing as root@142.103.237.12 >>> Executing as root@142.103.237.13 >>> Executing as root@142.103.237.14 >>> Executing as root@142.103.237.15 >>> Executing as root@142.103.237.16 >>> Executing as root@142.103.237.17 >>> Executing as root@142.103.237.19 >>> Executing as root@142.103.237.20 >>> Executing as root@142.103.237.21 >>> Executing as root@142.103.237.22 >>> Executing as root@142.103.237.23 >>> Executing as root@142.103.237.24 >>> Executing as root@142.103.237.25 >>> Executing as root@142.103.237.26 >>> Executing as root@142.103.237.27 >>> Executing as root@142.103.237.28 >>> Executing as root@142.103.237.29 >>> Executing as root@142.103.237.30 >>> Executing as root@142.103.237.31 >>> Executing as root@142.103.237.32 >>> Executing as root@142.103.237.33 >>> Executing as root@142.103.237.34 >>> Executing as root@142.103.237.35 >>> Executing as root@142.103.237.36 >>> Executing as root@142.103.237.37 >>> Executing as root@142.103.237.38 >>> Executing as root@142.103.237.39 >>> Executing as root@142.103.237.40 >>> Executing as root@142.103.237.41 >>> Executing as root@142.103.237.42 >>> Executing as root@142.103.237.43 >>> Executing as root@142.103.237.44 >>> Executing as root@142.103.237.45 >>> Executing as root@142.103.237.46 >>> Executing as root@142.103.237.47 >>> Executing as root@142.103.237.48 ############################################################ # vn3, vn5 need to be rebooted ############################################################ Fri Nov 19 08:11:01 PST 1999 ############################################################ Attaching video to vn18 Can't get any video out vn18 DEAD ############################################################ Fri Nov 19 08:42:43 PST 1999 Still have problems with vn18 ... leaving for now. Fri Nov 19 08:52:34 PST 1999 vnallCommand 'jj defunct | grep -v "jj defunct"' > /tmp/DEFUNCT9 >>> Executing as root@142.103.237.225 >>> Executing as root@142.103.237.226 564 ? Z 0:15 [mysqld ] >>> Executing as root@142.103.237.227 >>> Executing as root@142.103.237.1 >>> Executing as root@142.103.237.2 >>> Executing as root@142.103.237.3 >>> Executing as root@142.103.237.4 >>> Executing as root@142.103.237.5 >>> Executing as root@142.103.237.6 >>> Executing as root@142.103.237.7 561 ? Z 0:09 [mysqld ] >>> Executing as root@142.103.237.8 560 ? Z 0:08 [mysqld ] >>> Executing as root@142.103.237.9 >>> Executing as root@142.103.237.10 >>> Executing as root@142.103.237.11 >>> Executing as root@142.103.237.12 >>> Executing as root@142.103.237.13 >>> Executing as root@142.103.237.14 >>> Executing as root@142.103.237.15 >>> Executing as root@142.103.237.16 >>> Executing as root@142.103.237.17 >>> Executing as root@142.103.237.19 >>> Executing as root@142.103.237.20 >>> Executing as root@142.103.237.21 >>> Executing as root@142.103.237.22 >>> Executing as root@142.103.237.23 >>> Executing as root@142.103.237.24 >>> Executing as root@142.103.237.25 >>> Executing as root@142.103.237.26 >>> Executing as root@142.103.237.27 >>> Executing as root@142.103.237.28 >>> Executing as root@142.103.237.29 >>> Executing as root@142.103.237.30 >>> Executing as root@142.103.237.31 >>> Executing as root@142.103.237.32 >>> Executing as root@142.103.237.33 >>> Executing as root@142.103.237.34 >>> Executing as root@142.103.237.35 >>> Executing as root@142.103.237.36 >>> Executing as root@142.103.237.37 >>> Executing as root@142.103.237.38 >>> Executing as root@142.103.237.39 >>> Executing as root@142.103.237.40 >>> Executing as root@142.103.237.41 >>> Executing as root@142.103.237.42 >>> Executing as root@142.103.237.43 >>> Executing as root@142.103.237.44 >>> Executing as root@142.103.237.45 >>> Executing as root@142.103.237.46 >>> Executing as root@142.103.237.47 >>> Executing as root@142.103.237.48 ############################################################ Fri Nov 19 10:28:37 PST 1999 ############################################################ Preliminary performance via wave3d/grace 65x65x65 grid: ============= # procs Wall clock time -------- --------- 1 59 sec 2 34 sec 4 18 sec 8 56 sec 16 173 sec 129x129x129 grid: ================ # procs Wall clock time -------- --------- 1 484 sec 2 265 sec 4 146 sec 8 181 sec 16 244 sec ============================================================ Fri Nov 19 11:12:15 PST 1999 ============================================================ (1) vn18 down Fri Nov 19 11:11:53 PST 1999 Put v18's harddrive into vn48, vn18 now up, vn48 down. ============================================================ Fri Nov 19 11:51:34 PST 1999 Continuing with configuration of vn17-vn47 ============================================================ **** NEED TO GET LAPACK CONFIGURABLE AND INSTALLED (1) Modifying vnMPIinstall to install from matt@vnfe1:~/autoconf ###################################################################### CRASH_5 ###################################################################### Fri Nov 19 12:07:56 PST 1999 ############################################################ (1) vn44 down Probably time to take all machines down and check the BIOS. Fri Nov 19 14:26:00 PST 1999 BIOS was incorrect fixed, restarted, OK ###################################################################### Fri Nov 19 15:27:47 PST 1999 ############################################################ vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall HOMEMWC=/d/vnfe1/home/matt in scripts vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall > /tmp/MPIINSTALL_17_47 :n /tmp/MPIINSTALL_17_47 /d/vnfe1/home/matt/autoconf/mpich_patch_linux/servers/serv_p4.c not found server -> servers sleep 1000; vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall > /tmp/MPIINSTALL_17_47_2 vnMakeMPIMachines 1 47 vnallbgCommand 'cd /usr/tmp/install/mpich/examples/basic/; make' #As root@vn1 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 47 cpi # Seems to work sh mpirun -np 50 cpi 2&>1 > /tmp/MPI50 [root@vn1]# mpirun -np 50 cpi exit vi typescript Process 0 on vn1.physics.ubc.ca Process 1 on vn2.physics.ubc.ca Process 2 on vn3.physics.ubc.ca Process 3 on vn4.physics.ubc.ca Process 5 on vn6.physics.ubc.ca Process 4 on vn5.physics.ubc.ca Process 9 on vn10.physics.ubc.ca Process 6 on vn7.physics.ubc.ca Process 10 on vn11.physics.ubc.ca Process 34 on vn35.physics.ubc.ca Process 32 on vn33.physics.ubc.ca Process 12 on vn13.physics.ubc.ca Process 22 on vn23.physics.ubc.ca Process 30 on vn31.physics.ubc.ca Process 33 on vn34.physics.ubc.ca Process 26 on vn27.physics.ubc.ca Process 29 on vn30.physics.ubc.ca Process 18 on vn19.physics.ubc.ca Process 17 on vn18.physics.ubc.ca Process 16 on vn17.physics.ubc.ca Process 24 on vn25.physics.ubc.ca Process 31 on vn32.physics.ubc.ca Process 20 on vn21.physics.ubc.ca Process 28 on vn29.physics.ubc.ca Process 21 on vn22.physics.ubc.ca Process 19 on vn20.physics.ubc.ca Process 25 on vn26.physics.ubc.ca Process 14 on vn15.physics.ubc.ca Process 27 on vn28.physics.ubc.ca Process 23 on vn24.physics.ubc.ca Process 15 on vn16.physics.ubc.ca Process 13 on vn14.physics.ubc.ca Process 8 on vn9.physics.ubc.ca Process 11 on vn12.physics.ubc.ca Process 7 on vn8.physics.ubc.ca Process 36 on vn37.physics.ubc.ca Process 38 on vn39.physics.ubc.ca Process 37 on vn38.physics.ubc.ca Process 35 on vn36.physics.ubc.ca Process 40 on vn41.physics.ubc.ca Process 41 on vn42.physics.ubc.ca Process 39 on vn40.physics.ubc.ca Process 42 on vn43.physics.ubc.ca Process 45 on vn46.physics.ubc.ca Process 44 on vn45.physics.ubc.ca Process 43 on vn44.physics.ubc.ca Process 46 on vn47.physics.ubc.ca Process 47 on vnfe1.physics.ubc.ca Process 48 on vnfe2.physics.ubc.ca Process 49 on vnfe3.physics.ubc.ca pi is approximately 3.1416009869231249, Error is 0.0000083333333318 wall clock time = 0.018363 # Distribute Install/Installz vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install* /var/tmp/install; cd /var/tmp/install; ls' RNPL rnpl vnallbgCommand "cdi; Install rnpl" vnallCommand "cd /usr/local/lib; strings librnpl.a" > /tmp/rnpl vi /tmp/rnpl MWC_UTIL rvs vnallbgCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallbgCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallbgCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallbgCommand "cdi; Install utilio" vnallbgCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio OK utilmath vnallbgCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallbgCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" OK linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' lapack fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' # NEED TO DO LAPACK vnallbgCommand 'cdi; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; setenv FFLAGS "-O3"; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/bin; /bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/libblas.a' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/liblapack.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/liblapack.a' ############################################################ Fri Nov 19 20:01:33 PST 1999 ############################################################ (1) Shutting down whole system and checking/setting BIOSes Plan Unmount NFS mounts on vnfe1, vnfe2, vnfe3 Take nodes down Take front-ends down Fri Nov 19 22:29:07 PST 1999 Changed all BIOS settings (we hope) to reboot on AC restart, ignore all errors on reboot. FOUND 3 DEFECTIVE NODES FOR TOTAL OF FOUR --- two short on memory, one only detects one CPU --- currently have 1-44 vn4 did not come up, BIOS had stop on all errors rather than no errors ############################################################ Fri Nov 19 08:08:24 PST 1999 ############################################################ (1) Disconnected vn44 and sent back to Varsity with other "defective" nodes vnMakeMPIMachines 1 43 ############################################################ Fri Nov 19 15:41:14 PST 1999 ############################################################ (1) vn38 down (surprise!!), make vn43 -> vn38, vnN -> 42 Fri Nov 19 16:04:52 PST 1999 Accomplished swap via reseting of IP number rather than disk swap, but then need to re-initialize .ssh on vn38( formerly vn43) DONE ############################################################ Fri Nov 19 22:12:31 PST 1999 ############################################################ Happy cluster ... knock on wood. time ssh root@vn42 'mpirun -np 45 /var/tmp/install/mpich/examples/basic/cpi' . . . Process 14 on vn14.physics.ubc.ca Process 15 on vn15.physics.ubc.ca Process 8 on vn8.physics.ubc.ca Process 7 on vn7.physics.ubc.ca Process 4 on vn4.physics.ubc.ca Process 6 on vn6.physics.ubc.ca Process 44 on vnfe3.physics.ubc.ca pi is approximately 3.1416009869231249, Error is 0.0000083333333318 wall clock time = 0.010567 0.050u 0.020s 1:46.04 0.0% 0+0k 0+0io 640pf+0w So, to good first appoximation, 'cpi' wall clock time is measured in 10K sec units (! :-)) ############################################################ Fri Nov 19 22:45:50 PST 1999 ############################################################ (1) Wanted to use top in non-interactive mode, options are in there promisingly enough, e.g. top -n 1 -b # but ssh vn7 "top -n 1 -b" # craps out # top: tcgetattr() failed: Invalid argument ############################################################ Sat Nov 20 17:59:57 PST 1999 ############################################################ (1) Testing mpi0 as matt@vnfe1 MPI installation doesn't install mpif.h in /usr/local/include test -f /var/tmp/install/mpich/include/mpif.h && cp /var/tmp/install/mpich/include/mpif.h /usr/local/include vnallCommand "test -f /var/tmp/install/mpich/include/mpif.h && cp /var/tmp/install/mpich/include/mpif.h /usr/local/include" vnallCommand "ls /usr/local/include/mpif.h" # Modified vnMPIinstall # Modified matt@vnfe1:~/.aliases alias mpiset 'setenv MPI_DIR `pwdcan`; echo $MPI_DIR' [matt@vn1 mpi0]$ mpirun -np 16 Hostnames Hostnames: vn1.physics.ubc.ca [0:16] Hostnames: vn2.physics.ubc.ca [1:16] Hostnames: vn5.physics.ubc.ca [4:16] Hostnames: vn3.physics.ubc.ca [2:16] Hostnames: vn11.physics.ubc.ca [10:16] Hostnames: vn12.physics.ubc.ca [11:16] Hostnames: vn7.physics.ubc.ca [6:16] Hostnames: vn13.physics.ubc.ca [12:16] Hostnames: vn4.physics.ubc.ca [3:16] Hostnames: vn15.physics.ubc.ca [14:16] Hostnames: vn6.physics.ubc.ca [5:16] Hostnames: vn14.physics.ubc.ca [13:16] Hostnames: vn16.physics.ubc.ca [15:16] Hostnames: vn8.physics.ubc.ca [7:16] Hostnames: vn10.physics.ubc.ca [9:16] Hostnames: vn9.physics.ubc.ca [8:16] # BUT ... 'mpirun -np 32 Hostnames' ... took nearly 1 minute of CPU time # per processor? # Compiled and tested prdump ... seems to work OK ############################################################ Sun Nov 21 05:59:01 PST 1999 ############################################################ (1) Updated vutil to include 'dvmesh' exectuable, tested and verified on bh[123456], installing on cluster vnallbgCommand 'cdi; Installz vutil; cd /tmp; rehash; dvmesh 0 1 101' ############################################################ Sun Nov 21 14:17:20 PST 1999 ############################################################ (1) Running switch applet via vnfe1, Password protected switch, added laplace as authorized host and now, of course, cannot access from vnfe1. MAY be able to access via laplace, but may also have to attach the laptop via the serial port. Will not be able to regain manager-level control remotely since there's a "bug/feature" that if one uses the same password for both levels (legitimate in my mind for RO/RW distinction), one can only get in as operator. However, perusal of documentation suggests that passwords can be cleared by depressing 'clear' button on front of switch. ############################################################ Tue Nov 23 11:40:21 PST 1999 ############################################################ (1) Adding vn43 ... vn60 vn43 ... vn48 already connected, powered up and all came up # Partial output from 'vnallCommand date' >>> Executing as root@142.103.237.41 Tue Nov 23 11:42:47 PST 1999 >>> Executing as root@142.103.237.42 Tue Nov 23 11:42:44 PST 1999 >>> Executing as root@142.103.237.43 Secure connection to 142.103.237.43 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 11:47:26 PST 1999 >>> Executing as root@142.103.237.44 Secure connection to 142.103.237.44 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 11:48:54 PST 1999 >>> Executing as root@142.103.237.45 Tue Nov 23 11:54:00 PST 1999 >>> Executing as root@142.103.237.46 Tue Nov 23 11:45:47 PST 1999 >>> Executing as root@142.103.237.47 Tue Nov 23 11:50:39 PST 1999 >>> Executing as root@142.103.237.48 Secure connection to 142.103.237.48 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. (2) Connecting vn49 ... vn60. All power on, but vn59'fan is not running vnallCommand date >>> Executing as root@142.103.237.225 Tue Nov 23 12:24:21 PST 1999 >>> Executing as root@142.103.237.226 Tue Nov 23 12:24:19 PST 1999 >>> Executing as root@142.103.237.227 Tue Nov 23 12:24:16 PST 1999 >>> Executing as root@142.103.237.1 Tue Nov 23 12:24:21 PST 1999 >>> Executing as root@142.103.237.2 Tue Nov 23 12:24:25 PST 1999 >>> Executing as root@142.103.237.3 Tue Nov 23 12:24:27 PST 1999 >>> Executing as root@142.103.237.4 Tue Nov 23 12:24:30 PST 1999 >>> Executing as root@142.103.237.5 Tue Nov 23 12:24:29 PST 1999 >>> Executing as root@142.103.237.6 Tue Nov 23 12:24:33 PST 1999 >>> Executing as root@142.103.237.7 Tue Nov 23 12:24:30 PST 1999 >>> Executing as root@142.103.237.8 Tue Nov 23 12:24:34 PST 1999 >>> Executing as root@142.103.237.9 Tue Nov 23 12:24:38 PST 1999 >>> Executing as root@142.103.237.10 Tue Nov 23 12:24:36 PST 1999 >>> Executing as root@142.103.237.11 Tue Nov 23 12:24:40 PST 1999 >>> Executing as root@142.103.237.12 Tue Nov 23 12:24:41 PST 1999 >>> Executing as root@142.103.237.13 Tue Nov 23 12:24:41 PST 1999 >>> Executing as root@142.103.237.14 Tue Nov 23 12:24:42 PST 1999 >>> Executing as root@142.103.237.15 Tue Nov 23 12:24:45 PST 1999 >>> Executing as root@142.103.237.16 Tue Nov 23 12:24:46 PST 1999 >>> Executing as root@142.103.237.17 Tue Nov 23 12:24:55 PST 1999 >>> Executing as root@142.103.237.18 Tue Nov 23 12:24:51 PST 1999 >>> Executing as root@142.103.237.19 Tue Nov 23 12:24:54 PST 1999 >>> Executing as root@142.103.237.20 Tue Nov 23 12:24:56 PST 1999 >>> Executing as root@142.103.237.21 Tue Nov 23 12:24:55 PST 1999 >>> Executing as root@142.103.237.22 Tue Nov 23 12:24:55 PST 1999 >>> Executing as root@142.103.237.23 Tue Nov 23 12:24:53 PST 1999 >>> Executing as root@142.103.237.24 Tue Nov 23 12:24:50 PST 1999 >>> Executing as root@142.103.237.25 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.26 Tue Nov 23 12:24:56 PST 1999 >>> Executing as root@142.103.237.27 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.28 Tue Nov 23 12:25:01 PST 1999 >>> Executing as root@142.103.237.29 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.30 Tue Nov 23 12:24:57 PST 1999 >>> Executing as root@142.103.237.31 Tue Nov 23 12:25:00 PST 1999 >>> Executing as root@142.103.237.32 Tue Nov 23 12:25:03 PST 1999 >>> Executing as root@142.103.237.33 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.34 Tue Nov 23 12:25:01 PST 1999 >>> Executing as root@142.103.237.35 Tue Nov 23 12:24:56 PST 1999 >>> Executing as root@142.103.237.36 Tue Nov 23 12:24:57 PST 1999 >>> Executing as root@142.103.237.37 Tue Nov 23 12:24:56 PST 1999 >>> Executing as root@142.103.237.38 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.39 Tue Nov 23 12:24:58 PST 1999 >>> Executing as root@142.103.237.40 Tue Nov 23 12:25:05 PST 1999 >>> Executing as root@142.103.237.41 Tue Nov 23 12:25:04 PST 1999 >>> Executing as root@142.103.237.42 Tue Nov 23 12:25:02 PST 1999 >>> Executing as root@142.103.237.43 Secure connection to 142.103.237.43 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:29:43 PST 1999 >>> Executing as root@142.103.237.44 Secure connection to 142.103.237.44 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:31:10 PST 1999 >>> Executing as root@142.103.237.45 Tue Nov 23 12:36:16 PST 1999 >>> Executing as root@142.103.237.46 Tue Nov 23 12:28:02 PST 1999 >>> Executing as root@142.103.237.47 Tue Nov 23 12:32:55 PST 1999 >>> Executing as root@142.103.237.48 Secure connection to 142.103.237.48 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:31:40 PST 1999 >>> Executing as root@142.103.237.49 Secure connection to 142.103.237.49 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:30:12 PST 1999 >>> Executing as root@142.103.237.50 Secure connection to 142.103.237.50 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:27:58 PST 1999 >>> Executing as root@142.103.237.51 Secure connection to 142.103.237.51 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:30:32 PST 1999 >>> Executing as root@142.103.237.52 Secure connection to 142.103.237.52 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:33:21 PST 1999 >>> Executing as root@142.103.237.53 Secure connection to 142.103.237.53 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Wed Nov 24 04:29:10 PST 1999 >>> Executing as root@142.103.237.54 Secure connection to 142.103.237.54 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:29:01 PST 1999 >>> Executing as root@142.103.237.55 Secure connection to 142.103.237.55 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:28:39 PST 1999 >>> Executing as root@142.103.237.56 Secure connection to 142.103.237.56 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. 142.103.237.56: No route to host >>> Executing as root@142.103.237.57 Secure connection to 142.103.237.57 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:28:30 PST 1999 >>> Executing as root@142.103.237.58 Secure connection to 142.103.237.58 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:28:49 PST 1999 >>> Executing as root@142.103.237.59 Secure connection to 142.103.237.59 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 00:31:28 PST 1999 >>> Executing as root@142.103.237.60 Secure connection to 142.103.237.60 refused; reverting to insecure method. Using rsh. WARNING: Connection will not be encrypted. Tue Nov 23 12:31:32 PST 1999 ############################################################ Tue Nov 23 12:29:42 PST 1999 ############################################################ (1) Taking down vn59, vn60 ... if vn59 can't be fixed trivially, will make vn60 into vn59 (2) vn56 needed BIOS attention (halt on all errors -> halt on no errors) vnallbgCommand hostname | sort > /tmp/HOSTNAME >>> Executing as root@142.103.237.1 >>> Executing as root@142.103.237.10 >>> Executing as root@142.103.237.11 >>> Executing as root@142.103.237.12 >>> Executing as root@142.103.237.13 >>> Executing as root@142.103.237.14 >>> Executing as root@142.103.237.15 >>> Executing as root@142.103.237.16 >>> Executing as root@142.103.237.17 >>> Executing as root@142.103.237.18 >>> Executing as root@142.103.237.19 >>> Executing as root@142.103.237.2 >>> Executing as root@142.103.237.20 >>> Executing as root@142.103.237.21 >>> Executing as root@142.103.237.22 >>> Executing as root@142.103.237.225 >>> Executing as root@142.103.237.226 >>> Executing as root@142.103.237.227 >>> Executing as root@142.103.237.23 >>> Executing as root@142.103.237.24 >>> Executing as root@142.103.237.25 >>> Executing as root@142.103.237.26 >>> Executing as root@142.103.237.27 >>> Executing as root@142.103.237.28 >>> Executing as root@142.103.237.29 >>> Executing as root@142.103.237.3 >>> Executing as root@142.103.237.30 >>> Executing as root@142.103.237.31 >>> Executing as root@142.103.237.32 >>> Executing as root@142.103.237.33 >>> Executing as root@142.103.237.34 >>> Executing as root@142.103.237.35 >>> Executing as root@142.103.237.36 >>> Executing as root@142.103.237.37 >>> Executing as root@142.103.237.38 >>> Executing as root@142.103.237.39 >>> Executing as root@142.103.237.4 >>> Executing as root@142.103.237.40 >>> Executing as root@142.103.237.41 >>> Executing as root@142.103.237.42 >>> Executing as root@142.103.237.43 >>> Executing as root@142.103.237.44 >>> Executing as root@142.103.237.45 >>> Executing as root@142.103.237.46 >>> Executing as root@142.103.237.47 >>> Executing as root@142.103.237.48 >>> Executing as root@142.103.237.49 >>> Executing as root@142.103.237.5 >>> Executing as root@142.103.237.50 >>> Executing as root@142.103.237.51 >>> Executing as root@142.103.237.52 >>> Executing as root@142.103.237.53 >>> Executing as root@142.103.237.54 >>> Executing as root@142.103.237.55 >>> Executing as root@142.103.237.56 >>> Executing as root@142.103.237.57 >>> Executing as root@142.103.237.58 >>> Executing as root@142.103.237.59 >>> Executing as root@142.103.237.6 >>> Executing as root@142.103.237.7 >>> Executing as root@142.103.237.8 >>> Executing as root@142.103.237.9 vn1.physics.ubc.ca vn10.physics.ubc.ca vn11.physics.ubc.ca vn12.physics.ubc.ca vn13.physics.ubc.ca vn14.physics.ubc.ca vn15.physics.ubc.ca vn16.physics.ubc.ca vn17.physics.ubc.ca vn18.physics.ubc.ca vn19.physics.ubc.ca vn2.physics.ubc.ca vn20.physics.ubc.ca vn21.physics.ubc.ca vn22.physics.ubc.ca vn23.physics.ubc.ca vn24.physics.ubc.ca vn25.physics.ubc.ca vn26.physics.ubc.ca vn27.physics.ubc.ca vn28.physics.ubc.ca vn29.physics.ubc.ca vn3.physics.ubc.ca vn30.physics.ubc.ca vn31.physics.ubc.ca vn32.physics.ubc.ca vn33.physics.ubc.ca vn34.physics.ubc.ca vn35.physics.ubc.ca vn36.physics.ubc.ca vn37.physics.ubc.ca vn38.physics.ubc.ca vn39.physics.ubc.ca vn4.physics.ubc.ca vn40.physics.ubc.ca vn41.physics.ubc.ca vn42.physics.ubc.ca vn43.physics.ubc.ca vn44.physics.ubc.ca vn45.physics.ubc.ca vn46.physics.ubc.ca vn47.physics.ubc.ca vn48.physics.ubc.ca vn49.physics.ubc.ca vn5.physics.ubc.ca vn50.physics.ubc.ca vn51.physics.ubc.ca vn52.physics.ubc.ca vn53.physics.ubc.ca vn54.physics.ubc.ca vn55.physics.ubc.ca vn56.physics.ubc.ca vn57.physics.ubc.ca vn58.physics.ubc.ca vn59.physics.ubc.ca vn6.physics.ubc.ca vn7.physics.ubc.ca vn8.physics.ubc.ca vn9.physics.ubc.ca vnfe1.physics.ubc.ca vnfe2.physics.ubc.ca vnfe3.physics.ubc.ca *** 1-59 OK at current time ############################################################ Tue Nov 23 16:43:26 PST 1999 ############################################################ ############################################################ ### Secondary set-up of new nodes ### ### vn43 through vn59 ############################################################ ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # As matt@laplace, cds, vi vnSssInstall, which is to be cut-and-paste # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 cd /tmp ftp vnfe1 < 17-48 ssh root@vnfe1 "cat /d/vnfe1/home/matt/system/vn/image/master/etc/fstab_frag" cds vnRemote vnNFSsetup NFS apparently set-up on vn45.physics.ubc.ca NFS apparently set-up on vn46.physics.ubc.ca NFS apparently set-up on vn47.physics.ubc.ca ###--------------------------------------------------------- ### (3) Matt setup ###--------------------------------------------------------- vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Missing vn45 --- vn47 due to NSF mounting "udpated" password files # Update master_authorized_keys ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow vnDistEtc resolv.conf ###--------------------------------------------------------- ### (5) MPI ###--------------------------------------------------------- vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall vnMakeMPIMachines 1 59 /usr/local/util/machines/machines.LINUX vnallbgCommand 'cd /usr/tmp/install/mpich/examples/basic/; make' #As root@vn43 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 17 cpi Process 0 on vn43.physics.ubc.ca Process 1 on vn44.physics.ubc.ca Process 2 on vn45.physics.ubc.ca Process 5 on vn48.physics.ubc.ca Process 4 on vn47.physics.ubc.ca Process 3 on vn46.physics.ubc.ca Process 6 on vn49.physics.ubc.ca Process 8 on vn51.physics.ubc.ca Process 7 on vn50.physics.ubc.ca Process 9 on vn52.physics.ubc.ca Process 10 on vn53.physics.ubc.ca Process 11 on vn54.physics.ubc.ca Process 13 on vn56.physics.ubc.ca Process 12 on vn55.physics.ubc.ca Process 14 on vn57.physics.ubc.ca Process 15 on vn58.physics.ubc.ca Process 16 on vn59.physics.ubc.ca pi is approximately 3.1416009869231241, Error is 0.0000083333333309 wall clock time = 0.006573 ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install* /var/tmp/install; cd /var/tmp/install; ls' ###--------------------------------------------------------- ### (7) RNPL (needs to wait until MPI is installed) ###--------------------------------------------------------- rnpl vnallbgCommand "cdi; Install rnpl" vnallCommand "cd /usr/local/lib; strings librnpl.a" > /tmp/rnpl vi /tmp/rnpl OK ###--------------------------------------------------------- ### (8) MWC_UTIL ###--------------------------------------------------------- rvs vnallbgCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallbgCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallbgCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallbgCommand "cdi; Install utilio" vnallbgCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio OK utilmath vnallbgCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallbgCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" OK sv vnallbgCommand "cdi; ./Installz sv" vnallCommand "cd /usr/local/lib; strings libsv.a" > /tmp/sv **** PROBLEMS WITH ISO OTHERWISE OK jvs # NEEDS TO BE INSTALLED SINCE CONFIG LOOKS FOR OWN LIBRARY vnallbgCommand "cdi; ./Installz jvs" vnallbgCommand "cdi; ./Installz jvs" vnallCommand "cd /usr/local/lib; strings libjvs.a" > /tmp/jvs vnallCommand "cd /usr/local/bin; ls -lt j*" >> /tmp/jvs ###--------------------------------------------------------- ### (9) Numeric/Scientific Libraries ###--------------------------------------------------------- linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' OK odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' OK lapack vnallbgCommand 'cdi; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; setenv FFLAGS "-O3"; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/bin; /bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/libblas.a' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/liblapack.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/liblapack.a' OK fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' OK ###--------------------------------------------------------- ### End of secondary set-up ### Tue Nov 23 18:59:52 PST 1999 ###--------------------------------------------------------- ############################################################ Tue Nov 23 19:00:22 PST 1999 ############################################################ (1) Cleanup of core files in /root # Restored vnallCommand, vnallbgCommand to use all nodes vnallbgCommand 'test -f core && /bin/rm -f core' vnallbgCommand 'ls core' (2) Make nodes "public" ############################################################ Tue Nov 23 20:08:07 PST 1999 ############################################################ (1) Tracked down SDF bug Dave found---updating RNPL on vn and bh machines vnallbgCommand "cdi; Install rnpl" vnallCommand 'ls -lt /usr/local/bin/rnpl' (2) Noticed that I'm using "-O3" on bh, "-O2" on vn, should redo vn with "-O3" vnallbgCommand "cdi; setenv CFLAGS '-O3'; setenv FFLAGS '-O3'; Install rnpl" ############################################################ Tue Nov 23 21:05:16 PST 1999 ############################################################ (1) Trying to attach xterm from each node to rar0502.net.ubc.ca Host 'vn33' added to the list of known hosts. _X11TransSocketINETConnect: Can't get address for rar0502.net.ubc.ca konsole: cannot connect to X server rar0502.net.ubc.ca:0 Host key not found from the list of known hosts. ` ssh matt@vn33 'setenv DISPLAY rar0502.net.ubc.ca:0; xterm' _X11TransSocketINETConnect: Can't get address for rar0502.net.ubc.ca xterm Xt error: Can't open display: rar0502.net.ubc.ca:0 32 and 34 work [matt@vn33 /etc]$ nslookup rar0502.net.ubc.ca Server: vn33.physics.ubc.ca Address: 0.0.0.0 *** vn33.physics.ubc.ca can't find rar0502.net.ubc.ca: No response from server ############################################################ ### /etc/resolv.conf ############################################################ ... needs fixing search physics.ubc.ca nameserver 137.82.1.1 nameserver 137.82.28.3 nameserver 142.103.236.1 # Backgrounded all scp's in vnDistEtc vnDistEtc resolv.conf ############################################################ CRASH_10 ###################################################################### Wed Nov 24 06:25:52 PST 1999 (1) vn43 down System DOA as per vn38 etc. previously Shutting down vn59, swapped in vn43's disks, bringing up as vn43 vn43 up, vn59 labelled for shipout, vn60 still here!! DONE ############################################################ Wed Nov 24 08:12:44 PST 1999 ###################################################################### (1) Trying to reset security on switch Doesn't seem to work---need to bring laptop in and try it from the serial line. Wed Nov 24 15:02:45 PST 1999 Have laptop and serial line Passwords apparently *were* reset, reset again (buggered it up the first time), then initialized via Web (added only manager account, "root"), nodes can view, vnfe1 can manage, as can laplace and rar0502.net.ubc.ca OK ############################################################ Thu Nov 25 07:29:17 PST 1999 ###################################################################### (1) Transmission rate to cluster is < 10 base-T. Sent message to Dennis.OReilly@ubc.ca who replied Hi Matt, Thanks for reporting the problem. The backbone network equipment is capable of routing packets at full 1000Mbps speeds with a maximum per packet latency of less than 7 microseconds through the router. There must be some problem or anomaly that is causing the poor performance. I am surprised that your switch is reporting late collisions. That is not a good sign. If at all possible you should be using full duplex ethernet which makes collisions impossible. One possible problem is that there is a configuration mismatch between your switch and the backbone device. Both have to agree on whether the link is half duplex or full duplex. If the settings don't match that would account for the problem. Please give me a call anytime tomorrow (822-3072). I would like to come on site (either downstairs or in the Hennings building) to see what is happening for myself. Thanks Dennis Examined state via browser and A1 is running 100HDX, ports were auto so perhaps switch needed to be powered off after network was upgraded?? Changed to 100FDX. Will retime ftp of matt@bh1:~/autoconf/jdk_1.1.7-v3-glibc-x86.tar.gz 11550456 bytes sent in 0.986 secs (1.1e+04 Kbytes/sec) Then tried putting port A1 back into AUTO-sense mode, re-negotiated 100HDX so will leave hard-coded at 100FDX for time being Last Excessive Late Collisions TimeStamp 25-Nov-99 7:35:43 AM OK ############################################################ Thu Nov 25 07:58:43 PST 1999 ############################################################ (1) Can't rdump etc. from bh[]/laplace/godel, probably need to put entries into /etc/hosts.allow laplace.physics.ubc.ca 142.103.234.31 godel.physics.ubc.ca 142.103.234.22 bh1.physics.ubc.ca 142.103.235.51 bh2.physics.ubc.ca 142.103.235.52 bh3.physics.ubc.ca 142.103.235.53 bh4.physics.ubc.ca 142.103.235.54 bh5.physics.ubc.ca 142.103.235.55 bh6.physics.ubc.ca 142.103.235.56 rar0502.net.ubc.ca 142.103.175.48 etc Arc * # modify hosts.allow vnDistEtc hosts.allow vnallbgCommand 'killall -HUP inetd' OK ############################################################ Thu Nov 25 08:36:16 PST 1999 ############################################################ bhbgCommand 'cd /var/tmp/install/prdump/src; prdump vnfe1 2000000 100 gets A1 port pretty much up to 100 Mbit ############################################################ Thu Nov 25 09:11:19 PST 1999 ############################################################ ACTION (1) Can't start up services such as rdumpser as non-super-user ############################################################ Fri Nov 26 08:00:03 PST 1999 ############################################################ (1) Adding Bill Unruh, screwed up, gave him /d/vnfe3/home/unruh, instead of /d/vnfe1/home/unruh (2) Reinstalling MPI on vnfe1, also noticed that probably hadn't remade machines file /usr/local/util/machines/machines.LINUX vnMakeMPIMachines 1 58 serv_p4.c:1: p4.h: No such file or directory serv_p4.c:2: p4_sys.h: No such file or directory make[1]: *** [server.o] Error 1 ACTION NEED TO CHECK vnMPIinstall ############################################################ Mon Nov 29 14:01:28 PST 1999 ############################################################ (1) Adding Silviu from Bushe's group nu vnNewUsers silviu ############################################################ Mon Nov 29 19:18:34 PST 1999 ############################################################ From Kendal #Also, machines vn41 and vn42 don't mount my home partition, so I can't #log in to them right now. vnallCommand mount -a #Also, the times on the cluster aren't synchronized, which makes it a #bit of a mess when I try to compile things. Maybe we could set the #machines up to get the time from an atomic clock someplace. ############################################################ Tue Nov 30 14:22:11 PST 1999 ############################################################ (1) Nodes vn59, vn60, vn61, vn62, vn63 delivered from Varsity. Powering up # Node vn59's fan does not start!! # vn63 <-> vn59 --> vn63 to shop (2) Nodes 59, 60, 61, 62 up and running, had to use linuxconf to turn on/off some services 1: vn43 up 6+06:57, 0 users, load 0.00, 0.00, 0.00 . . . 10: vn59 up 0:25, 1 user, load 0.00, 0.00, 0.00 11: vn6 up 11+16:40, 0 users, load 0.00, 0.00, 0.00 12: vn60 up 0:25, 1 user, load 0.00, 0.00, 0.00 13: vn61 up 0:12, 3 users, load 0.00, 0.02, 0.02 14: vn62 up 0:13, 1 user, load 0.00, 0.01, 0.01 . . . ############################################################ Tue Nov 30 15:14:52 PST 1999 ############################################################ ############################################################ ### Secondary set-up of new nodes ### ### vn59, vn60, vn61, vn62 ############################################################ ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 cd /tmp ftp vnfe1 < 59->62 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (3) Matt setup ###--------------------------------------------------------- vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Missing vn45 --- vn47 due to NSF mounting "udpated" password files # Update master_authorized_keys ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd # Modify ~matt/scripts/vnN to include new nodes etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow vnDistEtc resolv.conf ###--------------------------------------------------------- ### (5) MPI ###--------------------------------------------------------- # Temporarily modify vnallbgCommand, vnallCommand to use vnnewN vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall vnMakeMPIMachines 1 62 /usr/local/util/machines/machines.LINUX vnallbgCommand 'cd /usr/tmp/install/mpich/examples/basic/; make' #As root@vn59 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 4 cpi Process 0 on vn59.physics.ubc.ca Process 1 on vn60.physics.ubc.ca Process 2 on vn61.physics.ubc.ca Process 3 on vn62.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 wall clock time = 0.002494 #As matt@vnfe1 cd demo/basic [matt@vnfe1 basic]$ mpirun -np 65 cpi Process 0 on vnfe1.physics.ubc.ca Process 1 on vnfe2.physics.ubc.ca Process 2 on vnfe3.physics.ubc.ca Process 3 on vn1.physics.ubc.ca Process 5 on vn3.physics.ubc.ca Process 6 on vn4.physics.ubc.ca Process 4 on vn2.physics.ubc.ca Process 7 on vn5.physics.ubc.ca Process 8 on vn6.physics.ubc.ca Process 11 on vn9.physics.ubc.ca Process 17 on vn15.physics.ubc.ca Process 21 on vn19.physics.ubc.ca Process 35 on vn33.physics.ubc.ca Process 40 on vn38.physics.ubc.ca Process 22 on vn20.physics.ubc.ca Process 36 on vn34.physics.ubc.ca Process 44 on vn42.physics.ubc.ca Process 62 on vn60.physics.ubc.ca Process 54 on vn52.physics.ubc.ca Process 58 on vn56.physics.ubc.ca Process 61 on vn59.physics.ubc.ca Process 60 on vn58.physics.ubc.ca Process 42 on vn40.physics.ubc.ca Process 64 on vn62.physics.ubc.ca Process 46 on vn44.physics.ubc.ca Process 57 on vn55.physics.ubc.ca Process 56 on vn54.physics.ubc.ca Process 24 on vn22.physics.ubc.ca Process 43 on vn41.physics.ubc.ca Process 59 on vn57.physics.ubc.ca Process 23 on vn21.physics.ubc.ca Process 26 on vn24.physics.ubc.ca Process 63 on vn61.physics.ubc.ca Process 53 on vn51.physics.ubc.ca Process 52 on vn50.physics.ubc.ca Process 50 on vn48.physics.ubc.ca Process 45 on vn43.physics.ubc.ca Process 55 on vn53.physics.ubc.ca Process 25 on vn23.physics.ubc.ca Process 48 on vn46.physics.ubc.ca Process 51 on vn49.physics.ubc.ca Process 49 on vn47.physics.ubc.ca Process 41 on vn39.physics.ubc.ca Process 30 on vn28.physics.ubc.ca Process 37 on vn35.physics.ubc.ca Process 34 on vn32.physics.ubc.ca Process 29 on vn27.physics.ubc.ca Process 32 on vn30.physics.ubc.ca Process 39 on vn37.physics.ubc.ca Process 33 on vn31.physics.ubc.ca Process 47 on vn45.physics.ubc.ca Process 28 on vn26.physics.ubc.ca Process 31 on vn29.physics.ubc.ca Process 27 on vn25.physics.ubc.ca Process 20 on vn18.physics.ubc.ca Process 16 on vn14.physics.ubc.ca Process 18 on vn16.physics.ubc.ca Process 19 on vn17.physics.ubc.ca Process 14 on vn12.physics.ubc.ca Process 15 on vn13.physics.ubc.ca Process 12 on vn10.physics.ubc.ca Process 13 on vn11.physics.ubc.ca Process 10 on vn8.physics.ubc.ca Process 9 on vn7.physics.ubc.ca Process 38 on vn36.physics.ubc.ca pi is approximately 3.1416009869231245, Error is 0.0000083333333314 ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install Installz /var/tmp/install; cd /var/tmp/install; ls' ###--------------------------------------------------------- ### (7) RNPL (needs to wait until MPI is installed) ###--------------------------------------------------------- rnpl vnallbgCommand "cdi; Install rnpl" vnallCommand "cd /usr/local/lib; strings librnpl.a" > /tmp/rnpl vi /tmp/rnpl OK ###--------------------------------------------------------- ### (8) MWC_UTIL ###--------------------------------------------------------- rvs vnallbgCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallbgCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallbgCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallbgCommand "cdi; Install utilio" vnallbgCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio vnallCommand "ls /usr/local/bin/nss" OK utilmath vnallbgCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallbgCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" OK sv vnallbgCommand "cdi; ./Installz sv" vnallCommand "cd /usr/local/lib; strings libsv.a" > /tmp/sv **** PROBLEMS WITH ISO OTHERWISE OK jvs # NEEDS TO BE INSTALLED TWICE SINCE CONFIG LOOKS FOR OWN LIBRARY vnallbgCommand "cdi; ./Installz jvs" vnallbgCommand "cdi; ./Installz jvs" vnallCommand "cd /usr/local/lib; strings libjvs.a" > /tmp/jvs vnallCommand "cd /usr/local/bin; ls -lt j*" >> /tmp/jvs OK ###--------------------------------------------------------- ### (9) Numeric/Scientific Libraries ###--------------------------------------------------------- linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' OK odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' OK lapack vnallbgCommand 'cdi; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; setenv FFLAGS "-O3"; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/bin; /bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/libblas.a' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/liblapack.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/liblapack.a' OK fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' OK ### GMP---probably should do globally?? (2) vnallbgCommand 'cdi; ./Installz gmp-2.0.2' vnallCommand 'ls -lt /usr/local/lib/libgmp.a' OK ###--------------------------------------------------------- ### Tue Nov 30 16:27:37 PST 1999 ### End of secondary set-up ###--------------------------------------------------------- ############################################################ Tue Nov 30 20:46:40 PST 1999 ############################################################ (1) After some success with NTP on laptop, will try configuring front-ends to run at NTP stratum 3, have nodes run off front ends. vnallbgCommand 'cdi; Installz ntp-4.0.98f' vnallCommand 'cd /usr/local/bin; ls -lt ntp*' OK #Coded vnDistNTP # AND accidentally killed vnfe1 due to deletion of # die() routine in vnDistNTP and existence of ~matt/scripts/die # Renamed die as DIE on vn machines # vnallCommand 'cd /etc; mv ntp.conf ntp.conf.O; ls -lt ntp*' vnDistNTP vnallCommand 'ls /etc/ntp.conf' foreach i (1 2 3) ssh root@vnfe${i} ntptimeset end vnallbgCommand 'touch /etc/ntp.drift' etc # Modify rc.local vnallbgCommand 'cd /etc/rc.d; CP ~matt/system/vn/image/master/etc/rc.local .' # Started up ntpd on vnfe1, vnfe2, vnfe3 ... clocks set after 6-7 # minutes Your clock is off by 0.0044062 seconds. (63.192.96.2) [15/15] Your clock is off by 0.0031289 seconds. (63.192.96.2) [15/15] Your clock is off by 0.0136401 seconds. (63.192.96.2) [15/15] # As root@vn62 ntptimeset Your clock is off by -17.2942020 seconds. (142.103.237.227) [15/15] # Hack 'vnallbgCommand' # /usr/local/bin/ntpd not found on 56 (due to bad date, ironically!) vnallbgCommand 'ps -Alf | grep ntp' vnallCommand ntptimeset #### # vn60, vn61 # # have bad time zones setup # does the trick ############################################################ # Front end /etc/ntp.conf # peer configuration for hosts vnfe[123].physics.ubc.ca # (expected to operate at stratum 3 or higher) server 132.246.168.148 server 63.192.96.2 server 192.35.82.50 driftfile /etc/ntp.drift ############################################################ ############################################################ # Node /etc/ntp.conf # peer configuration for hosts # vn1.physics.ubc.ca -- vn64.physics.ubc.ca # (expected to operate at stratum 4, off vnfe[123] server 142.103.237.225 server 142.103.237.226 server 142.103.237.227 driftfile /etc/ntp.drift ############################################################ ############################################################ Wed Dec 1 11:57:21 PST 1999 ############################################################ See README.CRASH (12) #vn63 back from shop with fan working # vn61, vn62 power supplies replaced on site # Bringing up vn61, vn62, vn63 # All up, vn63 will need secondary configuration ############################################################ Wed Dec 1 15:52:04 PST 1999 ############################################################ (1) Adding Josh Hacker from Stull's group in Ocean. vnNewUsers josh_hacker Noticed that vn59 is down. ***** Thu Feb 21 10:33:35 PST 2002: For comparison vis a vis my attempt to get access to monster.geog.ubc.ca for benchmarking of Itanium 4-ways with gigabit From jhack@phoenix.geog.ubc.ca Wed Dec 1 13:54:24 1999 From: Josh Hacker 1) Full Name: Josh Hacker 2) Preferred Login Name: atsci 3) Alternate Login Name (if preferred unavailable/not allowed): mc2 4) Group (see notes below): Other 5) Preferred e-mail: jhack@geog.ubc.ca 6) Contact Phone Number: 2-6620 7) Preferred Shell (see notes below): tcsh FILL OUT THE FOLLOWING ITEM *ONLY* IF YOU LISTED "OTHER" AS YOUR GROUP 8) Title (Faculty, post-doc, grad student, undergrad ...) and brief description of anticpated usage of cluster I am a grad student working with Roland Stull in atmospheric sciences. We have a working PC linux version of the MC2 meteorological model and would like to run a set of benchmarks. Runs will test the ability of the cluster to scale the job by using different multi-PE configurations and different size jobs. We may try to use up to 64 PEs if they are available. From matt Wed Dec 1 13:56:44 1999 To: Josh Hacker Subject: Re: request for vn account Hi Josh: Thanks for your message. Will be glad to set you up an account. If you can send me an encrypted password(*) that would be great, otherwise we'll have to do it "in person" somehow. We should have a chat in any case to discuss how best to get your stuff benchmarked ... Cheers ... Matt (*) I.e. the encrypted password from one of your local Linux systems, ask your sysadmin it you're unsure, unable to get it. Account was set up immediately, Josh visited the next day and then for the next few weeks getting benchmarks going ############################################################ CRASH_13 ############################################################ # vn59 is down (was vn61 this morning) # Shutdown (reboot) vn63 # Take down vn63 <-> vn59 Thu Dec 2 11:14:13 PST 1999 # vn63's power supply replaced # vn63 comes up OK, needs secondary configuration ############################################################ Wed Dec 1 16:31:51 PST 1999 # vn59 back up, need to change known_hosts vnallbgCommand 'CP /d/vnfe1/home/matt/system/vn/image/master/ssh/known_hosts /root/.ssh' ############################################################ Wed Dec 1 18:49:14 PST 1999 ############################################################ (1) Adding Tim Shannon tshannon:x:9002:9000:Tim Shannon:/d/vnfe3/home/tshannon:/bin/tcsh vnNewUsers tim_shannon Thu Dec 2 11:16:28 PST 1999 Trashed tim's passwd, re-creating account # As root@vnfe1 /bin/rm -rf /d/vnfe3/home/tshannon # As matt@vnfe1 etc # vi passwd vnDistEtc passwd vnNewUsers tim_shannon (2) Fixing up 'passwd.broken' as root@vnfe1:/usr/bin ### NO!! Figure out how to use YP ############################################################ Tue Dec 7 22:14:13 PST 1999 ############################################################ (1) vn20 is down, adding Joachim Stadel's account Was pretty hot in Computer Room --- got them to crank up the AC see README.CRASH (CRASH_14) ############################################################ Wed Dec 8 08:29:52 PST 1999 ############################################################ (1) Adding Pedro Maronetti's account ############################################################ Wed Dec 8 15:09:44 PST 1999 ############################################################ (1) Installing vn64, powers up fine, comes up fineo NEED TO DO SECONDARY CONFIGURATION ON vn63 vn64 ############################################################ Wed Dec 8 15:32:34 PST 1999 ############################################################ (1) vn61 went incommunicado while we were installing vn64 Still pingable, connecting video see README.CRASH (CRASH_15) ############################################################ Wed Dec 8 15:12:38 PST 1999 ############################################################ ############################################################ ### Secondary set-up of new nodes ### ### vn63, vn64 ############################################################ ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 cd /tmp ftp vnfe1 < 59->62 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (3) Matt setup ###--------------------------------------------------------- vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Missing vn45 --- vn47 due to NSF mounting "udpated" password files # Update master_authorized_keys ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd # Modify ~matt/scripts/vnN to include new nodes etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow vnDistEtc resolv.conf ###--------------------------------------------------------- ### (5) MPI ###--------------------------------------------------------- # Temporarily modify vnallbgCommand, vnallCommand to use vnnewN vnallbgCommand /d/vnfe1/home/matt/scripts/vnMPIinstall vnMakeMPIMachines 1 64 /usr/local/util/machines/machines.LINUX vnallbgCommand 'cd /usr/tmp/install/mpich/examples/basic/; make' As root@vn63 cd /usr/tmp/install/mpich/examples/basic/; mpirun -np 2 cpi Process 0 on vn63.physics.ubc.ca Process 1 on vn64.physics.ubc.ca pi is approximately 3.1416009869231241, Error is 0.0000083333333309 wall clock time = 0.000771 #As matt@vnfe1 cd demo/basic mpiset sh mpirun -np 67 cpi 2&>1 > /tmp/mpi67 # Seems to work ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install Installz /var/tmp/install; cd /var/tmp/install; ls' ###--------------------------------------------------------- ### (7) RNPL (needs to wait until MPI is installed) ###--------------------------------------------------------- rnpl vnallbgCommand "cdi; Install rnpl" vnallCommand "cd /usr/local/lib; strings librnpl.a" > /tmp/rnpl vi /tmp/rnpl OK ###--------------------------------------------------------- ### (8) MWC_UTIL ###--------------------------------------------------------- rvs vnallbgCommand "cdi; Install rvs" vnallCommand "cd /usr/local/lib; strings libvs.a" > /tmp/rvs vi /tmp/rvs OK rvsso vnallbgCommand "cdi; Install rvsso" vnallCommand "cd /usr/local/lib; strings libvsso.a" > /tmp/rvsso vi /tmp/rvsso OK vutil vnallbgCommand "cdi; Install vutil" vnallCommand "cd /usr/local/lib; strings libvutil.a" > /tmp/vutil vi /tmp/vutil OK utilio (x 2) vnallbgCommand "cdi; Install utilio" vnallbgCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio vnallCommand "ls /usr/local/bin/nss" OK utilmath vnallbgCommand "cdi; Install utilmath" vnallCommand "cd /usr/local/lib; strings libutilmath.a" > /tmp/utilmath vi /tmp/utilmath OK emkgcnad vnallbgCommand "cdi; Install emkgcnad" vnallCommand "cd /usr/local/bin; ls -lt emkgcnad femkgcnad" > /tmp/emkgcnad vi /tmp/emkgcnad vnallCommand "test -f /usr/local/bin/emkgcnad && /bin/rm -f /usr/local/bin/emkgcnad; ls -lt /usr/local/bin/emkgcnad || echo Deleted" vnallCommand "test -f /usr/local/bin/femkgcnad && /bin/rm -f /usr/local/bin/femkgcnad; ls -lt /usr/local/bin/femkgcnad || echo Deleted" OK sv vnallbgCommand "cdi; ./Installz sv" vnallCommand "cd /usr/local/lib; strings libsv.a" > /tmp/sv **** PROBLEMS WITH ISO OTHERWISE OK jvs # NEEDS TO BE INSTALLED TWICE SINCE CONFIG LOOKS FOR OWN LIBRARY vnallbgCommand "cdi; ./Installz jvs" vnallbgCommand "cdi; ./Installz jvs" vnallCommand "cd /usr/local/lib; strings libjvs.a" > /tmp/jvs vnallCommand "cd /usr/local/bin; ls -lt j*" >> /tmp/jvs OK ###--------------------------------------------------------- ### (9) Numeric/Scientific Libraries ###--------------------------------------------------------- linpack vnallbgCommand 'cdi; Installz netlib_linpack' vnallCommand 'ls /usr/local/lib/liblinpack.a' OK odepack vnallbgCommand 'cdi; Installz netlib_odepack' vnallCommand 'ls /usr/local/lib/libodepack.a' OK lapack vnallbgCommand 'cdi; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; setenv FFLAGS "-O3"; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/bin; /bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/BLAS/SRC/libblas.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/libblas.a' vnallCommand 'ls -lt /var/tmp/install/netlib_lapack/liblapack.a' vnallCommand '/bin/cp /var/tmp/install/netlib_lapack/liblapack.a /usr/local/lib' vnallCommand 'ls /usr/local/lib/liblapack.a' OK fftpack vnallbgCommand 'cdi; Installz netlib_fftpack' vnallCommand 'ls /usr/local/lib/libfftpack.a' OK ### GMP---probably should do globally?? (2) vnallbgCommand 'cdi; ./Installz gmp-2.0.2' vnallCommand 'ls -lt /usr/local/lib/libgmp.a' OK ###--------------------------------------------------------- ### (10) NTP ###--------------------------------------------------------- vnDistNTP vnallbgCommand 'cdi; Installz ntp-4.0.98f' vnallCommand 'cd /usr/local/bin; ls -lt ntp*' vnallbgCommand 'cd /etc/rc.d; CP ~matt/system/vn/image/master/etc/rc.local .' vnallbgCommand '/usr/local/bin/ntpd' #As root@vn6[34] ntptimeset ###--------------------------------------------------------- ### Wed Dec 8 17:40:03 PST 1999 ### End of secondary set-up ###--------------------------------------------------------- ############################################################ CRASH_16 ############################################################ Fri Dec 10 20:58:27 PST 1999 (1) vn43 incommunicado (really need to get remote re-boot figured out, also faster disk recovery) Hard reboot, back up at Fri Dec 10 22:00:13 PST 1999 Same problem as CRASH_14 (kernel: eth0, and lots of error messages in log! ... buggy kernel??) Dec 10 19:01:02 vn43 PAM_pwdb[32270]: (su) session closed for user news Dec 10 19:12:34 vn43 -- MARK -- Dec 10 19:32:34 vn43 -- MARK -- Dec 10 19:52:34 vn43 -- MARK -- Dec 10 20:01:01 vn43 anacron[32306]: Updated timestamp for job `cron.hourly' to 1999-12-10Dec 10 20:01:01 vn43 PAM_pwdb[32310]: (su) session opened for user news by (uid=9) Dec 10 20:01:02 vn43 PAM_pwdb[32310]: (su) session closed for user news Dec 10 20:12:34 vn43 -- MARK -- Dec 10 20:32:34 vn43 -- MARK -- Dec 10 20:46:54 vn43 kernel: eth0: Transmit timed out: status 0050 0000 at 2/2 command 000ca000. Dec 10 20:46:54 vn43 kernel: eth0: Trying to restart the transmitter... Dec 10 20:46:59 vn43 kernel: eth0: Transmit timed out: status 0050 0000 at 2/2 command 000ca000. Dec 10 20:46:59 vn43 kernel: eth0: Trying to restart the transmitter... Dec 10 20:47:04 vn43 kernel: eth0: Transmit timed out: status 0050 0000 at 2/2 co ############################################################ Definite problem with eth0 ############################################################ NEXT_ACTION: grep logs for eth0 error messages vnallCommand "cd /var/log; grep 'kernel: eth0:' messages | grep -v 'executing remote command' | lino 1 128" > /tmp/ETH0 see README.ETH0.0 http://beowulf.gsfc.nasa.gov/listarchives/linux-eepro100/1999/11/ Greetings, After experiencing the infamous "Transmit timed out" messages from eepro100.c on Intel 82559 boards, Donald suggested I try the version 1.09t from ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/test/eepro100.c. I am pleased to report that this test driver version cured all problems that I had encountered. In addition, I stress-tested this driver with a "SmartBits" 100Mb throughput testing device. With the new driver, the test completed without a single "Transmit timed out" message. With previous drivers, the test results were dismal (and peppered with tx timeouts). Here are the results for the new driver, on a PIII 550 running 2.0.36 CONFIG_M686, optimize as router, forwarding UDP packets in router (two card) mode with i82559 boards: http://cesdis.gsfc.nasa.gov/linux/misc/modules.html Frame size Pkts/sec Mbits/sec # Downloading static const char *version = "eepro100.c:v1.09t 9/29/99 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html\n"; # will test on bh1 mkdir -p src/linux/drivers/net # Download eepro100.c # Coded vnPatchEepro100 script bhCommand 'cd /lib/modules/`uname -r`/net/; strings eepro100.o | grep "Donald Becker"' bhCommand 'cd /lib/modules/`uname -r`/net/; strings eepro100.o | grep "Donald Becker"' >>> Executing as root@142.103.235.51 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html >>> Executing as root@142.103.235.52 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html >>> Executing as root@142.103.235.53 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html >>> Executing as root@142.103.235.54 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html >>> Executing as root@142.103.235.55 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html >>> Executing as root@142.103.235.56 eepro100.c:v1.06 10/16/98 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html # As root@bh6 vnPatchEepro100 install gcc -DCONFIG_X86_LOCAL_APIC -D__SMP__ -DMODULE -D__KERNEL__ -Wall -Wstrict-prototypes -O6 -c eepro100.c `[ -f /usr/include/linux/modversions.h ] && echo -DMODVERSIONS` # Compilation problems ... ############################################################ CRASH_17 ############################################################ Sat Dec 11 12:09:43 PST 1999 (1) vn33 down, odds are on eth0 problem, had one previously vn33 down 1:28 vnallCommand "cd /var/log; grep 'kernel: eth0:' messages* | grep -v 'executing remote command' | lino 1 128" > /tmp/ETH0 grep 'timed out' < /tmp/ETH0 | nth 4 | uniq vn1 vn6 vn20 vn33 vn39 vn43 vn50 Identified 142.103.237.1 142.103.237.6 142.103.237.20 142.103.237.33 142.103.237.39 142.103.237.43 142.103.237.50 # as candidates for hardware replacement, phoned in request # to Varsity, Bill's sending someone over for 2 pm Replaced cards in vn1 vn6 vn20 vn33 vn39 vn43 vn50 ############################################################ Sat Dec 11 15:01:52 PST 1999 ############################################################ (1) Problem with vn4's logging /var/log/messages* 0 size # syslogd wasn't running, started it manually ############################################################ Sat Dec 11 18:55:39 PST 1999 ############################################################ (1) Stress testing network cards # [vn1,vn6] As matt@vn1 cd /d/vnfe1/home/matt/demo/perftest mpirun -p4pg p0 ./mpptest -gnuplot -rate -size 16000 800000 16000 (x 2) # [vn20,vn33] As matt@vn20 cd /d/vnfe1/home/matt/demo/perftest mpirun -p4pg p1 ./mpptest -gnuplot -rate -size 16000 800000 16000 (x 2) # [vn39,vn43] As matt@vn39 cd /d/vnfe1/home/matt/demo/perftest mpirun -p4pg p2 ./mpptest -gnuplot -rate -size 16000 800000 16000 (x 2) # [vn50,vn16] As matt@vn50 cd /d/vnfe1/home/matt/demo/perftest mpirun -p4pg p3 ./mpptest -gnuplot -rate -size 16000 800000 16000 (x 2) # Still getting time out errors (and also on vn16), looks like # will have to try the updated driver and/or go to the 3-COM cards ############################################################ Mon Dec 13 12:25:57 PST 1999 ############################################################ (1) Adding Salcudean users with 'x' passwords msal:x:1800:1800:Martha Salcudean:/d/vnfe3/home/msal:/bin/tcsh OK nowak:x:1810:1800:Paul Nowak:/d/vnfe3/home/nowak:/bin/tcsh OK fengxs:x:1811:1800:Xioasi Feng:/d/vnfe3/home/fengxs:/bin/tcsh OK bibeau:x:1812:1800:Eric Bibeau:/d/vnfe3/home/bibeau:/bin/tcsh OK bian:x:1813:1800:Zhengbing Bian:/d/vnfe3/home/bian:/bin/tcsh OK galli:x:1814:1800:Mike Georgallis:/d/vnfe3/home/galli:/bin/tcsh OK kegang:x:1815:1800:Kegang Zhang:/d/vnfe3/home/kegang:/bin/tcsh OK hank:x:1816:1800:Dave Stropky:/d/vnfe3/home/hank:/bin/tcsh OK shariati:x:1817:1800:Mohammad Shariati:/d/vnfe3/home/shariati:/bin/tcsh OK jwyuan:x:1818:1800:Jerry Yuan:/d/vnfe3/home/jwyuan:/bin/tcsh OK suqin:x:1819:1800:Suqin Dong:/d/vnfe3/home/suqin:/bin/tcsh cchiu:x:1820:1800:Chris Chiu:/d/vnfe3/home/cchiu:/bin/tcsh hua:x:1821:1800:Lu Hua:/d/vnfe3/home/hua:/bin/tcsh jasonz:x:1822:1800:Xun Zhang:/d/vnfe3/home/jasonz:/bin/tcsh xiao:x:1823:1800:Zhu Zhi Xaio:/d/vnfe3/home/xiao:/bin/tcsh statie:x:1824:1800:Emil Statie:/d/vnfe3/home/statie:/bin/tcsh he:x:1825:1800:Pingfan He:/d/vnfe3/home/he:/bin/tcsh (2) Updating shadow with encrypted passwds ############################################################ CRASH_18 ############################################################ Tue Dec 14 18:17:33 PST 1999 (1) vn50 down at 17:31?? Still ping-able, not manifestly eth0 problem vn50 down 0:46 Dec 14 05:59:44 vn50 sshd[21692]: log: Rhosts with RSA host authentication accepted for matt, matt on vnfe1.physics.ubc.ca. Dec 14 05:59:44 vn50 sshd[21694]: log: executing remote command as user matt Dec 14 05:59:45 vn50 sshd[21692]: log: Closing connection to 142.103.237.225 Dec 14 06:01:00 vn50 anacron[21718]: Updated timestamp for job `cron.hourly' to 1999-12-14Dec 14 06:01:00 vn50 PAM_pwdb[21722]: (su) session opened for user news by (uid=9) Dec 14 06:01:00 vn50 PAM_pwdb[21722]: (su) session closed for user news Dec 14 06:17:58 vn50 -- MARK -- Dec 14 06:37:58 vn50 -- MARK -- Dec 14 06:38:26 vn50 sshd[659]: log: Generating new 768 bit RSA key. Dec 14 06:38:26 vn50 sshd[659]: log: RSA key generation complete. Dec 14 06:57:58 vn50 -- MARK -- Dec 14 07:01:00 vn50 anacron[21757]: Updated timestamp for job `cron.hourly' to 1999-12-14Dec 14 07:01:00 vn50 PAM_pwdb[21761]: (su) session opened for user news by (uid=9) Dec 14 07:01:00 vn50 PAM_pwdb[21761]: (su) session closed for user news Dec 14 07:17:58 vn50 -- MARK -- Dec 14 07:37:58 vn50 -- MARK -- ############################################################ CRASH_19 ############################################################ Wed Dec 15 18:18:05 PST 1999 (1) Josh apparently hung up vn60 Dec 15 20:26:42 vn60 sshd[662]: log: Generating new 768 bit RSA key. Dec 15 20:26:43 vn60 sshd[662]: log: RSA key generation complete. Dec 15 20:45:37 vn60 -- MARK -- Dec 15 20:58:41 vn60 sshd[24387]: log: Connection from 142.103.237.227 port 1023 Dec 15 20:58:45 vn60 sshd[24387]: log: Password authentication for atsci accepted. Dec 15 18:01:00 vn60 anacron[24412]: Updated timestamp for job `cron.hourly' to 1999-12-15Dec 15 18:01:00 vn60 PAM_pwdb[24416]: (su) session opened for user news by (uid=9) Dec 15 18:01:01 vn60 PAM_pwdb[24416]: (su) session closed for user news Dec 15 18:45:34 vn60 syslogd 1.3-3: restart. Dec 15 18:45:34 vn60 syslog: syslogd startup succeeded Dec 15 18:45:34 vn60 kernel: klogd 1.3-3, log source = /proc/kmsg started. Dec 15 18:45:34 vn60 kernel: Inspecting /boot/System.map-2.2.13-7mdksmp Dec 15 18:45:34 vn60 syslog: klogd startup succeeded ############################################################ Wed Dec 15 20:00:20 PST 1999 ############################################################ (1) Trying to implement cron job/script for rebooting machine if communication to the outside world via ssh to vnfe1 fails (probably should ensure that can't communicate with two or more unrelated machines, but will start with one. Implmenting on laptop /home/matt/scripts/vnHello /var/spool/cron As root@rar0502 crontab -e crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.1370 installed on Wed Dec 15 20:08:24 1999) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) #min hour daymo month daywk cmd 0,15,30,45 * * * * /home/matt/scripts/vnHello reboot (2) Distributing known_hosts cd ~matt/system/vn/image/master/ssh foreach m (`vnallN`) scp known_hosts root@${m}:~/.ssh end # Wrote ~matt/system/vn/image/master/ssh/Dist vnallCommand 'ls -lt /root/.ssh/known_hosts' (3) vnallCommand 'crontab -l' # None of nodes has crontab, can install from master file, # manually update front-ends (4) Why are/were timestamps from 'sshd' apparently awry (see root@vn60:/var/log/messages)?? (5) vnHello cron job seems to work on laptop, testing on vn60 # As root@vn60 crontab -e crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.1235 installed on Wed Dec 15 22:18:35 1999) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) #min hour daymo month daywk cmd 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnHello reboot # ~matt/system/vn/image/master/crontabs/the0 #min hour daymo month daywk cmd 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnHello reboot (6) Testing out on a few vn nodes ssh root@vn60 'crontab -l' ssh root@vn60 'crontab -r' # Modify vnNCommand to use "test" nodes vnNCommand 'crontab /d/vnfe1/home/matt/system/vn/image/master/crontabs/the0' vnNCommand 'crontab -l' vnNCommand 'crontab -r' # Test nodes (will try reboot) # vn39, vn50, vn60 Thu Dec 16 08:00:09 PST 1999 (7) Installing on all nodes vnNCommand 'crontab -r' vnNCommand 'crontab -l' # Modify vnNCommand to use all nodes vnNCommand 'crontab /d/vnfe1/home/matt/system/vn/image/master/crontabs/the0' vnNCommand 'crontab -l' vnNCommand 'tail -10 /var/log/cron' (8) 20 seconds might be cutting it a bit fine in vnHello, going to 30 vnHello business may get a little annyoying for Jason (bh1) and myself (laplace), could probably equivalently use vnfe1, vnfe2, vnfe3 ############################################################ CRASH_20 ############################################################ Thu Dec 16 09:23:24 PST 1999 (1) vn17 rebooted itself via VnHello, screwed up clock in the process, Roman Petryk was primary victim. Date on vn17 screwy, also vn17 apparently re-started about an hour ago, Roman was running on it. Dec 16 08:08:14 vn17 sshd[12488]: log: ROOT LOGIN as 'root' from vnfe1.physics.ubc.ca Dec 16 08:08:15 vn17 sshd[12490]: log: executing remote command as root: cat /var/log/messages.vnHello Dec 16 08:08:16 vn17 sshd[12488]: log: Closing connection to 142.103.237.225 Dec 16 08:16:45 vn17 gpm[535]: Error in protocol Dec 16 08:16:47 vn17 innd: innd shutdown succeeded Dec 16 08:16:48 vn17 innd: actived -9 succeeded Dec 16 08:16:49 vn17 xfs: xfs shutdown succeeded . . . Dec 16 08:17:10 vn17 crond: crond shutdown succeeded Dec 16 08:17:11 vn17 lpd: lpd shutdown succeeded Dec 16 08:17:13 vn17 kernel: Kernel logging (proc) stopped. Dec 16 08:17:13 vn17 kernel: Kernel log daemon terminating. Dec 16 08:17:14 vn17 syslog: klogd shutdown succeeded Dec 16 08:17:15 vn17 exiting on signal 15 Dec 17 00:18:56 vn17 syslogd 1.3-3: restart. Dec 17 00:18:56 vn17 syslog: syslogd startup succeeded Dec 17 00:18:56 vn17 kernel: klogd 1.3-3, log source = /proc/kmsg started. Dec 17 00:18:56 vn17 kernel: Inspecting /boot/System.map-2.2.13-7mdksmp Dec 17 00:18:56 vn17 syslog: klogd startup succeeded vnNCommand date > /tmp/DATE vnNCommand jj ntpd > /tmp/NTPD Apparently, only vn17's date screwed up, but might be short on a few more ntpd's? PROBABLY NOT grep /usr/local/bin/ntpd /tmp/NTPD | wc 60, but may not have caught some date +%m%d%H%M%Y.%S # SETTING_DATE FROM REMOTE HOST ssh root@vnfe1 date +%m%d%H%M%Y.%S date `ssh root@vnfe1 date +%m%d%H%M%Y.%S` vnallCommand ntptimeset > /tmp/NTPTIME date `!!` [root@vn17]# ruptime | grep vn17 vn17 up ??:??, 0 users, load 0.00, 0.00, 0.00 killall rwhod; rwhod ############################################################ Thu Dec 16 09:52:14 PST 1999 ############################################################ (1) Switching to vnfe[123] for vnHello ############################################################ CRASH_21 ############################################################ Fri Dec 17 14:31:32 PST 1999 vnHello: All appears well on vn36.physics.ubc.ca at Fri Dec 17 14:16:31 PST 1999vnHello: --------------------------------------------------------------------- vnHello: Executing on vn36.physics.ubc.ca at Fri Dec 17 14:30:01 PST 1999 vnHello: Rebooting vn36.physics.ubc.ca at Fri Dec 17 14:31:32 PST 1999 vnHello: --------------------------------------------------------------------- vnHello: Executing on vn36.physics.ubc.ca at Fri Dec 17 14:45:00 PST 1999 # But no trace in log file of receiver hang-up Dec 17 14:28:30 vn36 sshd[26999]: log: Rhosts with RSA host authentication accepted for root, matt on vnfe1.physics.ubc.ca. Dec 17 14:28:30 vn36 sshd[26999]: log: ROOT LOGIN as 'root' from vnfe1.physics.ubc.ca Dec 17 14:28:30 vn36 sshd[27001]: log: executing remote command as root: cdi; setenv CFLAGS "-O3"; setenv FFLAGS "-O3"; Installz jvs Dec 17 14:28:49 vn36 sshd[26999]: log: Closing connection to 142.103.237.225 Dec 17 14:31:35 vn36 gpm[531]: Error in protocol Dec 17 14:31:38 vn36 innd: innd shutdown succeeded Dec 17 14:31:38 vn36 innd: actived -9 succeeded # Ethan was running 'bubbles' at the time ... ############################################################ Sat Dec 18 15:59:36 PST 1999 (Happy Birthday, Paul!!) SEE README.KERNEL ############################################################ Wed Dec 22 19:07:41 PST 1999 See README.CRASH (CRASH_22 and CRASH_23) vn40, vn45 eth0 lock-ups (Roman B) ############################################################ Wed Dec 22 19:09:55 PST 1999 (1) Kernel update on vn nodes. SEE README.KERNEL ############################################################ Thu Dec 23 07:16:21 PST 1999 See README.CRASH (CRASH_24) vn5 NFS (Frans P) ############################################################ Fri Dec 24 08:04:40 PST 1999 (1) Successfully configured, installed and "tested" patched kernel on vn1, modulo NFS problems SEE README.KERNEL ############################################################ Fri Dec 24 11:51:50 PST 1999 ############################################################ (1) New kernel installed on vn1, vn61, vn62, vn63, vn64 vnMakeMPIMachines 1 64 # Try to stress-test with mpptest # Run vn61 <-> vn62 (x 2) # Run vn63 <-> vn64 (x 2) cd /d/vnfe1/home/matt/demo/perftest/ mkdir vn61_1 vn61_2 vn63_1 vn63_2 # As matt@vn61 cd /d/vnfe1/home/matt/demo/perftest/vn61_1 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 16000 cd /d/vnfe1/home/matt/demo/perftest/vn61_2 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 16000 # As matt@vn63 cd /d/vnfe1/home/matt/demo/perftest/vn63_1 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 16000 cd /d/vnfe1/home/matt/demo/perftest/vn63_2 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 16000 # Fri Dec 24 15:05:22 PST 1999 # Completed with no errors # As matt@vn61 cd /d/vnfe1/home/matt/demo/perftest/vn61_1 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 100 cd /d/vnfe1/home/matt/demo/perftest/vn61_2 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 100 # As matt@vn63 cd /d/vnfe1/home/matt/demo/perftest/vn63_1 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 100 cd /d/vnfe1/home/matt/demo/perftest/vn63_2 mpirun -np 2 ../mpptest -gnuplot -rate -size 16000 800000 100 ############################################################ Fri Dec 24 15:12:20 PST 1999 ############################################################ See README.KERNEL vnallCommand vnnewK vnallCommand date # Hangs at vn8, vn8 not pingable from outside world Dec 24 15:48:45 vn8 ifup: ifup: IPX = <> Dec 24 15:48:45 vn8 ifup: ifup: IPX = <> Dec 24 15:48:45 vn8 ifup: ifup: CONFIG = <> Dec 24 15:48:45 vn8 ifup: ifup: DEVICE = <> Dec 24 15:48:45 vn8 ifup: SIOCADDRT: Network is unreachable kill vn6-vn8 job # reboot vn8, hope the network comes up sanely # ... nope # Somehow have de-bugging version of /etc/sysconfig/network-scripts on # vn, but not relevant Dec 24 17:50:09 vn8 init: Entering runlevel: 5 Dec 24 17:50:09 vn8 ifup: SIOCADDRT: Network is unreachable Dec 24 17:50:09 vn8 network: Bringing up interface lo succeeded Dec 24 17:50:10 vn8 ifup: SIOCADDRT: Network is unreachable Should have read this fr&*ing file!; NOT a good idea to scp network-scripts from another machine. # AT KINCK, RE-BOOT SINGLE USER, mv /etc/sysconfig/network-scripts.sav # /etc/sysconfig/network-scripts. # Should be able to add route manually, re-install may be best long-term # solution FUTURE_ACTION # FIX vn8 SEE README.CRASH (CRASH_25), patched vn8's rc.local ############################################################ Sun Dec 26 02:45:39 PST 1999 ############################################################ (1) vn35 and vn55 (with new drivers) have rebooted themselves---still have lockup, but now with no error message?? # SEE README.KERNEL, downloaded diagnostic programs (eepro-diag, mii-diag), # installed (but note, have to compile somewhere where kernel source has # been configured, e.g. bh6) in ~matt/scripts and incorporated in # vnHELLO watchdog ############################################################ Sun Dec 26 08:36:58 PST 1999 ############################################################ (1) Paring down VNHELLO files # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN cp VNHELLO VNHELLO.991226 cp VNHELLO.ALL VNHELLO.ALL.991226 # As matt@vnfe1 vnNCommand 'cd /var/log; RM messages.vnHello*; ls -lt' # Updated web page ############################################################ Mon Dec 27 07:57:09 PST 1999 ############################################################ (1) vn3 rebooted over night, no sign of problem from eepro-diag or mii-diag, modifying vnHello to try to restart network # PERUSAL OF LOG SUGGESTS THAT SOME OF THE "HANGS" WITH VNHELLO # MAY BE "FAKE", SHOULD PERHAPS TRY RE-EXECUTE OF SCRIPT? ############################################################ Mon Dec 27 16:11:48 PST 1999 ############################################################ (1) 5 more candidates for rebooting vn18, vn20, vn21, vn22, vn23 Roman B had previously been running 'run_mpi4' on them ssh root@vn18 reboot ssh root@vn20 reboot ssh root@vn21 reboot ssh root@vn22 reboot ssh root@vn23 reboot (2) vn16 has been down for about 30 minutes, not pingable eth0 hang (see README.CRASH CRASH_28) ############################################################ Tue Dec 28 07:26:37 PST 1999 ############################################################ (1) vn21's date is off # As root@vn21 date `ssh root@vnfe1 date +%m%d%H%M%Y.%S` ############################################################ Tue Dec 28 10:31:06 PST 1999 ############################################################ (1) Downloading test versions of PG compilerso SEE ALSO matt@laplace:~/papers/Proposals/CFI99/QUOTES/pg # As matt@vnfe1 cd system mkdir vnpg cd vnpg # vi README netscape http://www.pgroup.com/ & # Download PGI Server # May need to install FLEXlm, will certainly have to # if we end up purchasing the compiler (2) http://www.globetrotter.com/flexlm/lmgrd.shtml cd system mkdir vnflexlm # Downloaded lmgrd, lmutil for Intel x86, Linux v1.x We do not recommend that lmgrd be run as the root user, since security experts recommended that users and administrators avoid running daemons as root when such daemons do not require root privileges, lmgrd does not require root privileges. To start lmgrd from system startup scripts (/etc/rc files), we recommend that you use the following command to ensure that it runs a su username -c "umask 022; lmgrd -c license.dat -l log" where username is a normal, non-privileged user lmgrd is the path to the lmgrd binary license.dat is the path to the license file log is the path to the lmgrd debug log file cd vnpg # Continue with installation # As root@vnfe[123] mkdir -p /usr/local/pgi setenv PGI /usr/local/pgi cdi mkdir pgi cd /var/tmp/install/pgi tar zxf /d/vnfe1/home/matt/system/vnpg/linux86-HPF-CC.tar.gz ./install #accept #5 (HPF/F90/F77/C/C++) #/usr/local/pgi # y # accept Name: "Matthew William Choptuik" User: matt Email: choptuik@physics.ubc.ca Hostid: PGI=009027E08D47066F388427 FLEXlm hostid: 0090278d4706 Hostname: vnfe3.physics.ubc.ca Installation: /usr/local/pgi PGI Release: 3.1 (3) As matt@vnfe[123] cd /tmp scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/src/tpgi . cd tpgi make (4) Note: lmgrd, lmgrd.rc and lmutil ship with the PG products (5) Documentation: # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mkdir pgi scp -r matt@vnfe1.physics.ubc.ca:/usr/tmp/install/pgi/doc . ############################################################ Tue Dec 28 15:29:38 PST 1999 ############################################################ (1) More reboots vn17, vn40, vn41, vn42, vn43, vn44 ssh root@vn17 reboot ssh root@vn40 reboot ssh root@vn41 reboot ssh root@vn42 reboot ssh root@vn43 reboot ssh root@vn44 reboot Hi Matt, Thanks for the message, I have cleaned nodes 40 to 44, and I need two more days for the job runing on nodes 45-49 to write down intermediate results. If it possible please make some nodes available for my number crunching (10 nodes will be excellent) All the best Roman vnNCommand date >>> Executing as root@142.103.237.17 Wed Dec 29 07:39:16 PST 1999 >>> Executing as root@142.103.237.40 Tue Dec 28 15:39:17 PST 1999 >>> Executing as root@142.103.237.41 Wed Dec 29 07:39:17 PST 1999 >>> Executing as root@142.103.237.42 Tue Dec 28 15:39:17 PST 1999 >>> Executing as root@142.103.237.43 Tue Dec 28 15:39:18 PST 1999 >>> Executing as root@142.103.237.44 Tue Dec 28 15:39:18 PST 1999 vnNCommand vnSetdate vnNCommand ntptimeset vnNCommand 'uname -a' # Verified new kernels # Needed to kill/restart rwhod on vn17, vn41 (2) Give Roman 40-44, 50-54, need to kill mpptest jobs running on vn50, vn51, vn52, vn53, vn54 foreach n (vn50 vn51 vn52 vn53 vn54) ssh matt@${n} killall mpptest end foreach n (vn50 vn51 vn52 vn53 vn54) jj mpptest end vnMpptest vn17 vn39 & vnMpptest vn17 vn39 1 & ############################################################ Tue Dec 28 17:04:07 PST 1999 ############################################################ (1) Is anacron active on the nodes, if so, what is it doing? # As root@vn7 cat /etc/anacrontab # /etc/anacrontab: configuration file for anacron # See anacron(8) and anacrontab(5) for details. SHELL=/bin/sh PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin # These entries are useful for a Mandrake system. 1 5 cron.daily run-parts /etc/cron.daily 7 10 cron.weekly run-parts /etc/cron.weekly 7 10 cron.hourly run-parts /etc/cron.hourly 30 15 cron.monthly run-parts /etc/cron.monthly which run-parts /usr/bin/run-parts cd /etc [root@vn7]# ls cron* crontab cron.d: kmod cron.daily: 0anacron* inn-cron-rnews* postfix* tetex.cron* inn-cron-expire* logrotate* slocate.cron* tmpwatch* cron.hourly: 0anacron* inn-cron-nntpsend* cron.monthly: 0anacron* cron.weekly: 0anacron* makewhatis-fr.cron* makewhatis-pl.cron* makewhatis-cs.cron* makewhatis-it.cron* makewhatis-ru.cron* makewhatis-de.cron* makewhatis-ja.cron* makewhatis.cron* makewhatis-es.cron* makewhatis-ko.cron* ############################################################ # HOURLY ############################################################ #!/bin/sh inn-cron-nntpsend* /sbin/chkconfig innd && su - news -c /usr/bin/nntpsend vnallCommand 'chkconfig --list innd' > /tmp/INND >>> Executing as root@142.103.237.225 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.226 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.227 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.1 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.2 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.3 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.4 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.5 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.6 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.7 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.8 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.9 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off . . . innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.63 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.64 innd 0:off 1:off 2:off 3:on 4:on 5:on 6:off # Presuambly, can safely turn this off vnallbgCommand 'chkconfig innd off' ############################################################ # DAILY ############################################################ [root@vn7]# pwd /etc/cron.daily [root@vn7]# ls 0anacron* inn-cron-rnews* postfix* tetex.cron* inn-cron-expire* logrotate* slocate.cron* tmpwatch* # inn-cron-* should be de-activated # Postfix is mailing system ... isn't sendmail good enough? # QUESTION: How is anacron getting activated, anyway???? # ANSWER: It's not, but 'anacron -u ...' is being executed via # cron scheduled jobs # /etc/crontab SHELL=/bin/bash PATH=/sbin:/bin:/usr/sbin:/usr/bin MAILTO=root HOME=/ # run-parts 01 * * * * root run-parts /etc/cron.hourly 02 4 * * * root run-parts /etc/cron.daily 22 4 * * 0 root run-parts /etc/cron.weekly 42 4 1 * * root run-parts /etc/cron.monthly # run-parts basically executes all the scripts in a specified directory # Interesting construct for detecting extensions # # Don't run [KS]??foo.{rpmsave,rpmorig,rpmnew} scripts # [ "${i%.rpmsave}" != "${i}" ] && continue # [ "${i%.rpmorig}" != "${i}" ] && continue # [ "${i%.rpmnew}" != "${i}" ] && continue # each of /etc/cron.* has '0anacron' script which invokes 'anacron' # e.g. /etc/cron.hourly #!/bin/sh # # anacron's cron script # # This script updates anacron time stamps. It is called through run-parts # either by anacron itself or by cron. # # The script is called "0anacron" to assure that it will be executed # _before_ all other scripts. anacron -u cron.hourly ############################################################ Tue Dec 28 18:26:59 PST 1999 ############################################################ (1) Might be an idea to disable gpm vnallCommand 'chkconfig gpm off' ############################################################ Wed Dec 29 10:30:25 PST 1999 ############################################################ (1) Roman B reports difficulty buidling MPI application with PG compilers, perhaps should install version of MPI with said compilers. (2) Start with version of RNPL using PG compilers ~/scripts/soPG setenv CC 'pgcc' echo "CC = $CC" setenv CXX 'pgCC' echo "CXX = $CXX" setenv CFLAGS '-fast' echo "CFLAGS = $CFLAGS" setenv CPPFLAGS '-DLINUX' echo "CPPFLAGS = $CPPFLAGS" setenv CXXFLAGS ' ' echo "CXXFLAGS = $CXXFLAGS" setenv F77 'pgf77' echo "F77 = $F77" setenv F77FLAGS '-fast' echo "F77FLAGS = $F77FLAGS" setenv F90 'pgf90' echo "F90 = $F90" setenv F90FLAGS '-fast' echo "F90FLAGS = $F90FLAGS" dnl ---------------------------------------------------------------------- dnl LINUX dnl ---------------------------------------------------------------------- LINUX) case "X$F77" in Xpgf77) BBH_SYSTEM=LINUX_PG F77_TRANSFORM="touch"; BBH_DEFS="$BBH_DEFS -DLINUX_PG" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -DCPPFLAGS_UNKNOWN" ;; *) BBH_SYSTEM=LINUX F77_TRANSFORM="touch"; BBH_DEFS="$BBH_DEFS -DLINUX" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -DLINUX -DWant_c_files" CXXFLAGS="$CXXFLAGS -DLINUX -DWant_c_files" ;; esac; # PG compilers detected via setting of F77 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # /usr/src/include/asm does not exist, from whence should it have been # set? #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% vnallbgCommand 'ln -s /usr/src/linux/include/asm-i386 /usr/src/linux/include/asm' #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # RNPL #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Set flags PG cd rnpl ./configure --prefix=`pwd` make full # works #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # MPI #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Coded vnMPImakePG # As root@vnfe1 vnMPImakePG #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Roman B's program #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # As matt@vnfe1 cd /home/matt/debug/roman cp -r ~roman/SourceMPI/ . PG make # Minor modifications to Makefile NEXT_ACTION # pgf90 chokes on /usr/local/include/mpif.h, but pgf77 is # OK with it cd debug/roman/SourceMPI sd replace '/usr/local/include/mpif.h' 'mpif.h' *.f90 cp /usr/local/include/mpif.h . # Add -Msecond_underscore to FFLAGS Only problem now is '^C' -> !, which Roman already seems to have addressed on zodiac # which, apparently, IS the f77 standard. replace mpif.h mpif.f90 *.f90 # ... and apparently mpif.f90 doesn't get installed by default vnallbgCommand 'cd /usr/local/include; cp ~matt/system/vn/image/master/usr/local/include/mpif.f90 .; ls -lt mpif.f90' vnallCommand 'cat /usr/local/include/mpif.f90' # NO!! mpif.f90 is a module ... just vnallCo vnallbgCommand 'cd /usr/local/include; CP ~matt/system/vn/image/master/usr/local/include/mpif.h .; ls -lt mpif.h' vnallCommand 'cat /usr/local/include/mpif.h' replace mpif.f90 mpif.h *.f90 # Install PGI Version of MPI in /usr/local/PGI # modified vnMPImakePG ssh root@vnfe1 vnMPImakePG ssh root@vnfe2 vnMPImakePG ssh root@vnfe3 vnMPImakePG cd /var/tmp/install/PG/mpich make install cds cp vnMakeMPIMachines vnMakeMPIPGIMachines # Modified mx vnMakeMPIPGIMachines vnMakeMPIPGIMachines 1 64 # MPI/PGI seems to work ############################################################ Wed Dec 29 12:28:11 PST 1999 ############################################################ (1) vn64 has problems Dec 29 10:04:18 vn64 sshd[27985]: log: Closing connection to 142.103.237.225 Dec 29 10:12:04 vn64 sshd[28008]: log: Connection from 142.103.237.225 port 1004Dec 29 10:12:05 vn64 sshd[28008]: log: Rhosts with RSA host authentication accepted for matt, matt on vnfe1.physics.ubc.ca. Dec 29 10:12:05 vn64 sshd[28010]: log: executing remote command as user matt Dec 29 10:12:07 vn64 sshd[28008]: log: Closing connection to 142.103.237.225 Dec 29 10:16:52 vn64 modprobe: can't locate module lo:0 Dec 29 10:16:52 vn64 modprobe: can't locate module lo:1 Dec 29 10:16:53 vn64 modprobe: can't locate module lo:2 Dec 29 10:16:53 vn64 modprobe: can't locate module lo:3 Dec 29 10:16:53 vn64 modprobe: can't locate module lo:4 Dec 29 10:16:53 vn64 modprobe: can't locate module lo:5 Dec 29 10:16:53 vn64 modprobe: can't locate module lo:6 # Killed mpp jobs and did a manual restart of the network---seems to have fixed problem, # need to keep working on vnHello vnMpptest vn63 vn64 & vnMpptest vn63 vn64 1 & # Nope ... looks like a re-boot is in order # Done ############################################################ Wed Dec 29 13:33:32 PST 1999 ############################################################ (1) setenv MANPATH "$PGI/man" clobbers default paths # Use setenv MANPATH "`manpath`:$PGI/man" ############################################################ Wed Dec 29 21:52:39 PST 1999 ############################################################ (1) vnfe1, vnfe2 only have one processor each recognized, will need to check on-site Thu Dec 30 07:41:50 PST 1999 # Disabling vnHELLO vnNCommand 'crontab /d/vnfe1/home/matt/system/vn/image/master/crontabs/the1' vnNCommand 'crontab -l' Thu Dec 30 09:21:25 PST 1999 # NOTE THAT IT IS PROCESSOR 0, NOT 1 WHICH IS BEING PICKED UP ON # vnfe1, vnfe2 #--------------------------------------------------------------------- !!ssh root@vnfe1 cat /proc/cpuinfo #--------------------------------------------------------------------- processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.699147 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 3 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 psn mmx osfxsr kni bogomips : 445.64 #--------------------------------------------------------------------- !!ssh root@vnfe2 cat /proc/cpuinfo #--------------------------------------------------------------------- processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.697000 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 3 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 psn mmx osfxsr kni bogomips : 445.64 #--------------------------------------------------------------------- !!ssh root@vnfe3 cat /proc/cpuinfo #--------------------------------------------------------------------- processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.699427 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr kni bogomips : 447.28 processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.699427 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr kni bogomips : 445.64 Thu Dec 30 09:21:53 PST 1999 Rebooting vnfe1 # FOUND PERTINENT BIOS SETTING IN PROCESSOR SET-UP (OPTION GETS SET TO # YES TO PROBE THE HARDWARE AT NEXT REBOOT) SEEMS TO HAVE DONE THE TRICK #--------------------------------------------------------------------- !!ssh root@vnfe1 cat /proc/cpuinfo #--------------------------------------------------------------------- processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.695367 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr kni bogomips : 447.28 processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.695367 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx osfxsr kni bogomips : 445.64 # Send message to Roman B asking him to vacate vnfe2 at some point FUTURE_ACTION # CHANGE vnfe2's BIOS (F2 ON REBOOT) TO PROBE HARDWARE # As root@vnfe1 rnpl_bench_wave2d # Updated matt@laplace.physics.ubc.ca:~/system/BENCHMARKS/vn # excerpted here ... vn1 (2) vnfe1 (2) 100.7/3.9 (1:46) 99.0% 100.8/3.3 (1:51) 93% 103.2/2.1 (1.47) 98.4% 102.0/2.6 (1:52) 93% NEXT_ACTION mpptest on available nodes except the 30's ############################################################ Thu Dec 30 10:27:26 PST 1999 ############################################################ (1) Archiving Rtop # As matt@laplace cd system mkdir vnArchive cd vn cp -r Rtop ../vnArchive/Rtop.1999:12:30:1032 ############################################################ Thu Dec 30 10:54:45 PST 1999 ############################################################ (1) Started up mpptest on idle nodes, Kendal now running 'JED' on 1 16 17 18 20 21 22 ############################################################ Sat Jan 1 16:24:40 PST 2000 ############################################################ (1) Roman P cleared off his nodes: rebooting ssh root@vn10 reboot ssh root@vn11 reboot ssh root@vn12 reboot ssh root@vn13 reboot ssh root@vn14 reboot ssh root@vn15 reboot ssh root@vn19 reboot ssh root@vn2 reboot ssh root@vn27 reboot ssh root@vn28 reboot ssh root@vn29 reboot ssh root@vn30 reboot ssh root@vn31 reboot ssh root@vn32 reboot ssh root@vn37 reboot ssh root@vn7 reboot ssh root@vn4 reboot # Didn't come back SEE README.CRASH (CRASH_31) vn10: Sat Jan 1 17:36:08 PST 2000 # vn11: Sat Jan 1 16:36:08 PST 2000 vn12: Sat Jan 1 16:36:09 PST 2000 vn13: Sat Jan 1 16:36:09 PST 2000 vn14: Sat Jan 1 16:36:09 PST 2000 vn15: Sat Jan 1 16:36:10 PST 2000 vn19: Sun Jan 2 08:36:10 PST 2000 # vn2: Sat Jan 1 16:36:10 PST 2000 vn27: Sat Jan 1 16:36:10 PST 2000 vn28: Sat Jan 1 16:36:11 PST 2000 vn29: Sat Jan 1 08:36:11 PST 2000 # vn30: Sat Jan 1 16:36:12 PST 2000 vn31: Sat Jan 1 18:36:12 PST 2000 # vn32: Sun Jan 2 08:36:12 PST 2000 # vn37: Sat Jan 1 16:36:12 PST 2000 vn7: Sat Jan 1 16:36:13 PST 2000 ############################################################ Sat Jan 1 20:29:31 PST 2000 ############################################################ (1) Trying to fix vn4's syslog problem [root@vn4]# chkconfig --list | grep log syslog 0:off 1:off 2:on 3:off 4:on 5:off 6:off [root@vn4]# chkconfig syslog on [root@vn4]# chkconfig --list | grep log syslog 0:off 1:off 2:on 3:on 4:on 5:on 6:off ############################################################ Sun Jan 2 18:16:30 PST 2000 ############################################################ (1) Last of the reboots to the new kernel ssh root@vn45 reboot ssh root@vn46 reboot ssh root@vn47 reboot ssh root@vn48 reboot ssh root@vn49 reboot foreach m (vn45 vn46 vn47 vn48 vn49) ssh root@${m} Date end foreach m (vn45 vn46 vn47 vn48 vn49) ssh root@${m} uname -a end vnNCommand 'uname -a' > /tmp/UNAME >>> Executing as root@142.103.237.1 Linux vn1.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.2 Linux vn2.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.3 Linux vn3.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.4 Linux vn4.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.5 Linux vn5.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.6 Linux vn6.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.7 Linux vn7.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.8 Linux vn8.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.9 Linux vn9.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.10 Linux vn10.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.11 Linux vn11.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.12 Linux vn12.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.13 Linux vn13.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.14 Linux vn14.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.15 Linux vn15.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.16 Linux vn16.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.17 Linux vn17.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.18 Linux vn18.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.19 Linux vn19.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.20 Linux vn20.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.21 Linux vn21.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.22 Linux vn22.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.23 Linux vn23.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.24 Linux vn24.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.25 Linux vn25.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.26 Linux vn26.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.27 Linux vn27.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.28 Linux vn28.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.29 Linux vn29.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.30 Linux vn30.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.31 Linux vn31.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.32 Linux vn32.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.33 Linux vn33.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.34 Linux vn34.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.35 Linux vn35.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.36 Linux vn36.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.37 Linux vn37.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.38 Linux vn38.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.39 Linux vn39.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.40 Linux vn40.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.41 Linux vn41.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.42 Linux vn42.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.43 Linux vn43.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.44 Linux vn44.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.45 Linux vn45.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.46 Linux vn46.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.47 Linux vn47.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.48 Linux vn48.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.49 Linux vn49.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.50 Linux vn50.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.51 Linux vn51.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.52 Linux vn52.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.53 Linux vn53.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.54 Linux vn54.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.55 Linux vn55.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.56 Linux vn56.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.57 Linux vn57.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.58 Linux vn58.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.59 Linux vn59.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.60 Linux vn60.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.61 Linux vn61.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.62 Linux vn62.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.63 Linux vn63.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown >>> Executing as root@142.103.237.64 Linux vn64.physics.ubc.ca 2.2.13-7Pmdksmp #1 SMP Fri Dec 24 10:04:34 PST 1999 i686 unknown ############################################################ CRASH_32 ############################################################ Mon Jan 3 07:43:57 PST 2000 (1) Mijan managed to hang vn1 pretty easily> FUTURE_ACTION Is this a sshd problem? ############################################################ Mon Jan 3 07:54:49 PST 2000 ############################################################ (1) Temporarily shutting down mpptest's vnallCommand 'killall vnMpptest' vnallCommand 'killall mpptest' vnallCommand 'jj mpptest' ############################################################ Mon Jan 3 08:11:29 PST 2000 ############################################################ (1) Trying mijan's job cd /d/vnfe1/home/matt/debug/mijan/n161run/ # pgfile vn1 0 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn2 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn3 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn4 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn5 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn6 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn7 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn8 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn9 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn10 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn11 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn12 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn13 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn14 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn15 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par vn16 1 /d/vnfe1/home/matt/debug/mijan/n161run/agave run.par # As matt@vn1 cd /d/vnfe1/home/matt/debug/mijan/n161run mpirun -p4pg pgfile agave superbbh161.par # Doesn't work mpirun -p4pg pgfile agave ############################################################ Mon Jan 3 09:38:36 PST 2000 ############################################################ (1) vnfe2 down, mijan apparent culprit, will take this chance to reboot to attempt to fix processor non-detection see README.CRASH (CRASH_33) vnfe2 now recognizes both processors processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.700780 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 3 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 psn mmx osfxsr kni bogomips : 447.28 processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 447.700780 cache size : 512 KB fdiv_bug : no hlt_bug : no sep_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 3 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 psn mmx osfxsr kni bogomips : 445.64 ############################################################ Tue Jan 11 12:44:48 PST 2000 ############################################################ (1) Adding Ming (Henry) He nu vi minghe vnNewUsers # changed encrypted passwd vnDistEtc shadow minghe:$1$6ZUzxNa/$j.opg92/1I3bklY1K81u20:10969:0:99999:7:::135460352 ############################################################ Wed Jan 19 09:13:39 PST 2000 ############################################################ (1) Jason has updated PG licenses, need to recompile MPI etc # As matt@vnfe1 # Building RNPL with PG compilers in ~/autoconf # Modified 'mfi' so that LINUX_PG is synonymous with LINUX # 'f77' examples don't compile because RNPL isn't using 'double # underscore' convention with LINUX_PG, modified aclocal.m4 # macro BBH_INSTALL_GENF77EXTERN LINUX|LINUX_PG) if test -f src/genf77extern_linux.c; then cp src/genf77extern_linux.c src/genf77extern.c AC_MSG_RESULT(src/genf77extern_linux.c -> src/genf77extern.c) else AC_MSG_RESULT(Warning: src/genf77extern_linux.c not found) fi ;; # OK (2) Rebuilding MPI ssh root@vnfe1 vnMPImakePG ssh root@vnfe2 vnMPImakePG ssh root@vnfe3 vnMPImakePG ssh root@vnfe1 'cd /var/tmp/install/PG/mpich; make install' ssh root@vnfe2 'cd /var/tmp/install/PG/mpich; make install' ssh root@vnfe3 'cd /var/tmp/install/PG/mpich; make install' vnMakeMPIPGIMachines 1 64 # OK ############################################################ Wed Jan 19 12:19:46 PST 2000 ############################################################ (1) vn12 down (see CRASH_34) ############################################################ Tue Jan 25 12:00:32 PST 2000 ############################################################ (1) New Account for Valdimir Murashov Subject: request for vn account (fwd) password he sent: 8sHkS.H/U9bEk Cc: Vladimir Murashov 1) Full Name: Vladimir Murashov 2) Preferred Login Name: murashov 3) Alternate Login Name (if preferred unavailable/not allowed): vmur 4) Group: Patey 5) Preferred e-mail: murashov@chem.ubc.ca 6) Contact Phone Number: 822-6262 7) Preferred Shell: csh 8) Title: post-doc ############################################################ Fri Jan 28 07:35:35 PST 2000 ############################################################ (1) New Account for Suresh Pillai Encrypted password: pBPH3zoI758Uw From pillai@physics.ubc.ca Thu Jan 27 17:30:30 2000 1) Full Name: Suresh Pillai 2) Preferred Login Name: pillai 3) Alternate Login Name (if preferred unavailable/not allowed): suresh 4) Group (see notes below): Other 5) Preferred e-mail: pillai@physics.ubc.ca 6) Contact Phone Number: 822-1393 7) Preferred Shell (see notes below): tcsh 8) Title (Faculty, post-doc, grad student, undergrad ...) and brief description of anticpated usage of cluster Grad Student with Prof Bergersen. Running c++ simulations of integrate and fire models. These do not require a cluster structure, yet I have many to run and so would like to run them at the same time. I will move the outputs to theory.physics if required to do so. ############################################################ Sun Jan 30 04:27:52 PST 2000 ############################################################ (1) Playing with bbh_grace vnallCommand vnallbgCommand # vnallN -> vntestN (1, 16, 25, 27) vnallbgCommand 'cdi; Install bbh_grace' As root@{godel,laplace} cd /var/tmp/install; zcat ~matt/autoconf/bbh_grace.tar.Z | tar xf - : cd bbh_grace; ./configure --prefix=`pwd` ############################################################ Sun Jan 30 05:25:07 PST 2000 ############################################################ (1) Was running 'GRACE/wave2d' on 1, 15, 25, 27 from vn1, and apparently have hung vn27 See README.CRASH (CRASH_35) Continue experiment with 1, 15, 16, 17 see README.GRACE ############################################################ Sun Jan 30 19:58:57 PST 2000 ############################################################ (1) "Experimentation" has left cluster in pretty bad shape vis a vis defunct processes vnallCommand 'jj defunct | grep -v jj' > /tmp/DEFUNCT >>> Executing as root@142.103.237.225 >>> Executing as root@142.103.237.226 >>> Executing as root@142.103.237.227 >>> Executing as root@142.103.237.1 104 Z lothar 12626 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12641 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12656 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12657 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12658 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12659 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12660 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12661 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12662 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12663 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12664 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12665 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12666 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12667 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12668 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12669 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12670 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12671 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12672 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] 104 Z lothar 12673 12624 0 60 0 - 0 exit_n Jan16 pts/0 00:00:00 [rsh ] >>> Executing as root@142.103.237.2 >>> Executing as root@142.103.237.3 >>> Executing as root@142.103.237.4 >>> Executing as root@142.103.237.5 >>> Executing as root@142.103.237.6 >>> Executing as root@142.103.237.7 >>> Executing as root@142.103.237.8 544 Z root 321 1 0 60 0 - 0 exit_n 1999 ? 00:00:04 [klogd ] 544 Z root 366 1 0 60 0 - 0 exit_n 1999 ? 00:00:04 [inetd ] 444 Z xfs 573 1 0 60 0 - 0 exit_n 1999 ? 00:00:05 [xfs ] 444 Z news 621 1 0 60 0 - 0 exit_n 1999 ? 00:00:08 [innd ] 504 Z root 670 1 0 60 0 - 0 exit_n 1999 ? 00:00:05 [prefdm ] 504 Z root 674 1 0 60 0 - 0 exit_n 1999 ? 00:07:44 [X ] 444 Z root 675 1 0 60 0 - 0 exit_n 1999 ? 00:09:56 [prefdm ] 504 Z mijan 19140 1 0 60 0 - 0 exit_n 1999 ? 00:00:00 [tcsh ] 444 Z mijan 19153 1 0 60 0 - 0 exit_n 1999 ? 00:00:00 [agave ] 404 Z daub 6001 1 2 60 0 - 0 exit_n Jan09 ? 01:45:45 [mcmuvt.v96 ] 504 Z root 16035 1 0 60 0 - 0 exit_n Jan12 ? 00:00:04 [in.rshd ] 104 Z matt 16036 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z matt 16048 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [wave ] 044 Z matt 16062 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z matt 16078 1 0 60 0 - 0 exit_n Jan12 pts/0 00:00:00 [tcsh ] 044 Z root 16098 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z matt 16191 1 0 60 0 - 0 exit_n Jan12 pts/1 00:00:00 [tcsh ] 044 Z root 16237 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z root 16271 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z root 16314 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z root 16327 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [anacron ] 044 Z root 16341 1 0 60 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] 044 Z root 16363 1 0 65 0 - 0 exit_n Jan12 ? 00:00:00 [tcsh ] >>> Executing as root@142.103.237.9 >>> Executing as root@142.103.237.10 >>> Executing as root@142.103.237.11 >>> Executing as root@142.103.237.12 >>> Executing as root@142.103.237.13 >>> Executing as root@142.103.237.14 >>> Executing as root@142.103.237.15 >>> Executing as root@142.103.237.16 >>> Executing as root@142.103.237.17 >>> Executing as root@142.103.237.18 >>> Executing as root@142.103.237.19 >>> Executing as root@142.103.237.20 >>> Executing as root@142.103.237.21 >>> Executing as root@142.103.237.22 >>> Executing as root@142.103.237.23 >>> Executing as root@142.103.237.24 >>> Executing as root@142.103.237.25 >>> Executing as root@142.103.237.26 >>> Executing as root@142.103.237.27 >>> Executing as root@142.103.237.28 >>> Executing as root@142.103.237.29 >>> Executing as root@142.103.237.30 >>> Executing as root@142.103.237.31 >>> Executing as root@142.103.237.32 >>> Executing as root@142.103.237.33 >>> Executing as root@142.103.237.34 104 Z murashov 28166 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] 104 Z murashov 28168 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] 104 Z murashov 28169 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] 104 Z murashov 28170 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] 104 Z murashov 28171 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] 104 Z murashov 28173 28150 0 60 0 - 0 exit_n Jan11 ? 00:00:00 [rsh ] >>> Executing as root@142.103.237.35 >>> Executing as root@142.103.237.36 >>> Executing as root@142.103.237.37 >>> Executing as root@142.103.237.38 >>> Executing as root@142.103.237.39 >>> Executing as root@142.103.237.40 >>> Executing as root@142.103.237.41 >>> Executing as root@142.103.237.42 >>> Executing as root@142.103.237.43 >>> Executing as root@142.103.237.44 >>> Executing as root@142.103.237.45 >>> Executing as root@142.103.237.46 >>> Executing as root@142.103.237.47 >>> Executing as root@142.103.237.48 >>> Executing as root@142.103.237.49 >>> Executing as root@142.103.237.50 >>> Executing as root@142.103.237.51 >>> Executing as root@142.103.237.52 >>> Executing as root@142.103.237.53 >>> Executing as root@142.103.237.54 >>> Executing as root@142.103.237.55 >>> Executing as root@142.103.237.56 >>> Executing as root@142.103.237.57 >>> Executing as root@142.103.237.58 >>> Executing as root@142.103.237.59 >>> Executing as root@142.103.237.60 >>> Executing as root@142.103.237.61 >>> Executing as root@142.103.237.62 >>> Executing as root@142.103.237.63 >>> Executing as root@142.103.237.64 # Need rebooting # vn1, vn8, vn34 # But vn1, vn34 are being used ssh root@vn8 # Looks like vn8 will need manual intervention, problems may be # due to improper MPI clean-up?? # Note that vn8 is node that needs explicit 'route add default' # in /etc/rc.d/rc.local see README.CRASH (CRASH_38) ############################################################ Mon Jan 31 08:51:15 PST 2000 ############################################################ (1) vnswitch Manager Level Facilities for vnfe2 142.103.237.226 vnfe3 142.103.237.227 laplace 142.103.234.31 godel 142.103.234.22 bh1 142.103.235.51 rar0502 142.103.175.48 bh2 142.103.235.52 bh3 142.103.235.53 bh4 142.103.235.54 bh5 142.103.235.55 bh6 142.103.235.56 ############################################################ Mon Jan 31 15:07:28 PST 2000 ############################################################ (1) vnfe3 filled up 9127952 suqin 4607870 atsci 466760 wkb 107218 xiao 75698 tshannon 67057 bian 31984 shariati 6845 httpd 2454 matt 1692 ftp ############################################################ Mon Jan 31 15:19:05 PST 2000 ############################################################ (1) vn36 hung up (see README.CRASH (37)) ############################################################ Mon Jan 31 20:05:21 PST 2000 ############################################################ (1) Coded matt@vnfe1:~/scripts/vnPigs to generate usage summary by user and export to .../Doc/VN/USERS Added to root@vnfe1's crons ssh root@vnfe1 crontab -e # Get node status (load factors) and export to laplace.physics.ubc.ca Web pages 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnStatus ; /d/vnfe1/home/matt/scripts/vnPigs ############################################################ Tue Feb 1 15:15:15 PST 2000 ############################################################ (1) vnswitch Manager Level Facilities for dsl105.net.ubc.ca: 142.103.175.105 (Obviously, it's going to change again! Should demand credit!! 1 hour already) ############################################################ Fri Feb 4 11:00:58 PST 2000 ############################################################ (1) Lothar reports MPI problems with cluster, and preliminary work with MIjan (as well as experience over the past two days suggests) that some nodes may need to be rebooted. vn49 vn50 OK vn51 OK vn52 OK vn53 BAD REBOOT OK vn54 BAD REBOOT OK vn55 OK vn56 vn57 vn58 vn59 vn60 vn61 vn62 vn63 vn64 # Trying agave on 32 processors # As mijan@vn64 cd /d/vnfe1/home/mijan/agaverun time mpirun -np 32 -machinefile mfile agave flat33.par 17.200u 9.060s 2:35.84 16.8% 0+0k 0+0io 33495pf+0wO time mpirun -np 32 -machinefile mfile agave flat65.par (18.1% mem) 77.670u 59.930s 7:28.99 30.6% 0+0k 0+0io 33472pf+0w time mpirun -np 64 -machinefile mfile agave flat129.par (26.7% mem) 252.560u 145.090s 28:33.83 23.2% 0+0k 0+0io 63884pf+0w time mpirun -np 64 -machinefile mfile agave flat161.par () 460.760u 367.320s 48:24.52 28.5% 0+0k 0+0io 63885pf+0w ############################################################ Sat Feb 5 18:52:09 PST 2000 ############################################################ (1) Installed svs vnallCommand 'cdi; ls Install' vnallCommand 'cdi; setenv CC cc; unsetenv CFLAGS; setenv PREFIX /usr/local; setenv MAKE on; Install svs;' vnallCommand 'cd /usr/local/lib; strings libsvs.a' ############################################################ Mon Feb 7 11:36:38 PST 2000 ############################################################ (1) Kendal needs NETCDF library compiled with PG compilers. Good way to test is to install HDF, then RNPL and use .hdf output. Default installation for freeware etc. compiled with PG compilers /usr/local/PGI matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/autoconf/HDF4.1r3.tar.gz # As matt@vnfe1 scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/autoconf/HDF4.1r3.tar.gz . # As root@vnfe1 # Scripting isn't going to work, apparently need to hand-mung files cdi; cd PGI; gunzip -c ~matt/autoconf/HDF4.1r3.tar.gz | tar xf - cd HDF4.1r3 configure # Configuring for a i686-unknown-linux host cd config bu mh-linux # Modified mh-linux and backerd up to # ~matt/system/vn/image/master/install/PGI/HDF4.1r3/config cd .. ./configure -v --prefix=/usr/local/PGI make make install # ls -ltR /usr/local/PGI # As root@vnfe[23] cdi; cd PGI; gunzip -c ~matt/autoconf/HDF4.1r3.tar.gz | tar xf - cd HDF4.1r3/config/ cp ~matt/system/vn/image/master/install/PGI/HDF4.1r3/config/mh-linux . cd .. ./configure -v --prefix=/usr/local/PGI make make install ############################################################ Tue Feb 8 10:04:04 PST 2000 ############################################################ (1) Luis having problems with Cactus/MPI? rm_l_3_12364: p4_error: interrupt SIGINT: 2 p0_25479: p4_error: interrupt SIGSEGV: 11 bm_list_25480: p4_error: interrupt SIGINT: 2 rm_l_7_11841: p4_error: interrupt SIGINT: 2 rm_l_6_7320: p4_error: interrupt SIGINT: 2 rm_l_1_22834: p4_error: interrupt SIGINT: 2 rm_l_4_5712: p4_error: interrupt SIGINT: 2 rm_l_2_17233: p4_error: interrupt SIGINT: 2 rm_l_5_31133: p4_error: interrupt SIGINT: 2 I asked for 8 procs, so I run on (I guess) vnfe1 vn1 vn10 vn16 vn18 vn27 vn28 vn3 # fpi works OK on those machines # As does rnpl/wave2d, so who knows, suggest that Mijan and Luis # let me run the thing vnallCommand jj cactus > /tmp/CACTUS ############################################################ Thu Feb 10 12:21:19 PST 2000 ############################################################ (1) Rebooted vn1 this am after alert from Lothar, see README.CRASH (CRASH_39) (2) Seeing what I can do about Kendal's NETCDF prob. /home/matt/debug/wkb/pelican # Will try the PG debugger pgdbg burn run burn write(6,*) 'gets here' call NCENDF(ncid, rcode) write(6,*) 'does not get here' # Need to get/install NETCDF documentation # Done ############################################################ Sat Feb 12 15:19:10 PST 2000 ############################################################ (1) Kendal installed netcdf-3.4 /d/vnfe3/home/wkb/netcdf/netcdf-3.4 and apparently that's working for him. ############################################################ Fri Feb 11 19:21:22 PST 2000 ############################################################ (1) After complaints from Frans and Inaki about memory problems on vn37, vn57 and vn58, reboot vn57 SEE README.CRASH (CRASH_40) Feb 10 16:09:16 vn57 sshd[17823]: log: executing remote command as user matt Feb 10 16:09:19 vn57 sshd[17821]: log: Closing connection to 142.103.237.225 Feb 10 16:17:11 vn57 sshd[17847]: log: Connection from 142.103.237.225 port 1002Feb 10 16:17:11 vn57 sshd[17847]: log: Rhosts with RSA host authentication accepted for matt, matt on vnfe1.physics.ubc.ca. Feb 10 16:17:12 vn57 sshd[17849]: log: executing remote command as user matt Feb 10 16:17:13 vn57 kernel: Unable to load interpreter Feb 10 16:17:13 vn57 last message repeated 2 times Feb 10 16:17:14 vn57 sshd[17847]: log: Closing connection to 142.103.237.225 Feb 10 16:25:00 vn57 sshd[17868]: log: Connection from 142.103.237.225 port 986 Feb 10 16:25:00 vn57 sshd[17868]: log: Rhosts with RSA host authentication accepted for matt, matt on vnfe1.physics.ubc.ca. . . . ############################################################ CRASH_41, CRASH_42, CRASH_43, CRASH_44 ############################################################ vn.physics.ubc.ca Compute Node Status: Sat Feb 12 13:30:00 PST 2000 The following nodes are down: 1: vn32 down 0:58 2: vn35 down 1:00 3: vn56 down 1:23 vn39, vn27 and vn11 seem to have some kind of problem also because I cannot run my progrma neither. I think there is no enough free memory on them. Rebooted vn39 Check Rtop logs from 1200-1400 Last log is 2000:02:12:1159.59 (script hung up thereafter) vn.physics.ubc.ca usage: Sat Feb 12 12:03:12 PST 2000 NODE PID USER PRI NI SIZE RSS SHARE STAT LIB %CPU %MEM TIME COMMAND 1: vn64 10833 inaki 12 0 454M 454M 552 R 0 50.2 90.0 10:37 vlasov 2: vn58 26033 inaki 12 0 260M 260M 552 R 0 50.2 51.5 6:55 vlasov 3: vn27 18913 suqin 19 0 24560 23M 580 R 0 50.1 4.7 295:39 a.out 4: vn59 27070 inaki 12 0 260M 260M 552 R 0 50.0 51.4 7:18 vlasov 5: vn17 21978 suqin 20 0 24568 23M 576 R 0 50.0 4.7 1:53 a.out 6: vn31 23998 suqin 14 0 24564 23M 576 R 0 49.9 4.7 11:43 a.out 7: vn63 5233 inaki 12 0 390M 390M 552 R 0 49.8 77.1 10:24 vlasov 8: vn35 15076 fransp 12 0 3188 3188 768 R 0 49.8 0.6 417:00 ads 9: vn1 13350 suqin 10 0 24548 23M 580 R 0 49.8 4.7 296:30 a.out 10: vn10 8326 daub 19 0 2324 2324 464 R 0 49.7 0.4 739:20 mcmuvt.v96 11: vn12 28204 roman 15 0 3896 3896 984 R 0 49.6 0.7 7300m Run_mpi2 12: vn8 20448 daub 17 0 2236 2236 464 R 0 49.4 0.4 746:45 mcmuvt.v96 13: vn51 27245 suqin 11 0 24640 24M 576 R 0 48.9 4.7 0:18 a.out 14: vn53 19674 fransp 14 0 3188 3188 768 R 0 48.8 0.6 179:06 ads 15: vn57 1985 suqin 20 0 24548 23M 580 R 0 48.7 4.7 298:15 a.out 16: vn57 1957 suqin 19 0 24508 23M 580 R 0 48.7 4.7 299:51 a.out 17: vn54 19689 fransp 20 0 3188 3188 768 R 0 48.7 0.6 174:51 ads 18: vn28 31098 suqin 18 0 24588 24M 576 R 0 48.6 4.7 3:36 a.out 19: vn51 27249 suqin 10 0 12684 12M 2092 R 0 48.5 2.4 0:09 f771 20: vn15 30164 fransp 15 0 3188 3188 768 R 0 48.5 0.6 396:19 ads 21: vn52 25926 fransp 10 0 3188 3188 768 R 0 48.3 0.6 174:51 ads 22: vn37 25420 fransp 16 0 3188 3188 768 R 0 48.2 0.6 159:56 ads 23: vn36 7212 suqin 20 0 24588 24M 580 R 0 48.1 4.7 292:38 a.out 24: vn9 31927 daub 16 0 2236 2236 464 R 0 48.0 0.4 3608m mcmuvt.v96 25: vn25 7673 fransp 20 0 3188 3188 768 R 0 48.0 0.6 406:51 ads 26: vn7 19732 daub 14 0 2056 2056 464 R 0 47.9 0.3 745:47 mcmuvt.v96 27: vn52 25937 fransp 10 0 3188 3188 768 R 0 47.8 0.6 174:39 ads 28: vn11 3257 suqin 15 0 24320 23M 580 R 0 47.8 4.7 5:19 a.out 29: vn36 9480 suqin 20 0 24564 23M 576 R 0 47.7 4.7 10:01 a.out 30: vn48 29509 roman 15 0 8932 8932 872 S 0 47.6 1.7 9016m Run_mpi2 31: vn61 3283 suqin 16 0 24560 23M 580 R 0 47.5 4.7 297:00 a.out 32: vn6 14002 daub 16 0 2332 2332 464 R 0 47.4 0.4 2194m mcmuvt.v96 33: vn42 7724 roman 16 0 8940 8940 872 R 0 47.4 1.7 18821m Run_mpi4 34: vn29 23862 suqin 15 0 24484 23M 580 R 0 47.4 4.7 302:09 a.out 35: vn55 24243 fransp 15 0 3188 3188 768 R 0 47.3 0.6 176:08 ads 36: vn10 9116 matt 18 0 21928 21M 920 R 0 47.3 4.2 662:02 emkgcnad 37: vn53 19656 fransp 12 0 3188 3188 768 R 0 47.2 0.6 179:25 ads 38: vn2 30164 fransp 15 0 3188 3188 768 R 0 47.2 0.6 158:57 ads 39: vn60 24485 suqin 16 0 24508 23M 580 R 0 46.9 4.7 299:25 a.out 40: vn47 29570 roman 14 0 8932 8932 872 R 0 46.9 1.7 8999m Run_mpi2 41: vn38 29015 suqin 17 0 24504 23M 580 R 0 46.9 4.7 302:32 a.out 42: vn21 22372 roman 18 0 8940 8940 872 S 0 46.7 1.7 11196m Run_mpi4 43: vn32 17170 suqin 18 0 24396 23M 580 R 0 46.6 4.7 312:52 a.out 44: vn3 13048 daub 16 0 2428 2428 464 R 0 46.6 0.4 2865m mcmuvt.v96 45: vn23 21916 roman 17 0 8940 8940 872 R 0 46.6 1.7 11285m Run_mpi4 46: vn40 11237 roman 20 0 8452 8340 348 R 0 46.5 1.6 18880m Run_mpi4 47: vn33 27965 suqin 20 0 24384 23M 580 R 0 46.4 4.7 313:08 a.out 48: vn54 19671 fransp 18 0 3188 3188 768 R 0 46.3 0.6 176:01 ads 49: vn5 24406 daub 14 0 2208 2208 464 R 0 46.3 0.4 644:08 mcmuvt.v96 50: vn8 20441 daub 13 0 2032 2032 464 R 0 46.1 0.3 747:07 mcmuvt.v96 51: vn50 25568 suqin 19 0 24604 24M 576 R 0 45.8 4.7 0:52 a.out 52: vn49 29489 roman 12 0 8932 8932 872 R 0 45.8 1.7 9027m Run_mpi2 53: vn44 7663 roman 13 0 8940 8940 872 R 0 45.8 1.7 18863m Run_mpi4 54: vn4 23592 daub 10 0 2236 2236 464 R 0 45.7 0.4 748:12 mcmuvt.v96 55: vn22 21441 roman 14 0 8940 8940 872 R 0 45.7 1.7 11231m Run_mpi4 56: vn19 13146 roman 10 0 8932 8932 872 R 0 45.7 1.7 7629m Run_mpi2 57: vn19 13126 roman 10 0 8960 8960 876 R 0 45.7 1.7 7608m Run_mpi2 58: vn46 29764 roman 11 0 8932 8932 872 R 0 45.6 1.7 8997m Run_mpi2 59: vn43 7665 roman 13 0 8940 8940 872 R 0 45.6 1.7 18855m Run_mpi4 60: vn13 26272 roman 15 0 8960 8960 876 R 0 45.6 1.7 7572m Run_mpi2 61: vn30 26302 suqin 20 0 24376 23M 580 R 0 45.5 4.7 8:34 a.out 62: vn62 2850 suqin 14 0 24592 24M 580 R 0 45.4 4.7 294:54 a.out 63: vn44 7643 roman 12 0 8964 8964 876 R 0 45.4 1.7 18843m Run_mpi4 64: vn14 30846 roman 11 0 8932 8932 872 R 0 45.4 1.7 7601m Run_mpi2 65: vn45 9436 suqin 10 0 24460 23M 580 R 0 45.3 4.7 307:55 a.out 66: vn45 9405 suqin 12 0 24464 23M 580 R 0 45.3 4.7 309:44 a.out 67: vn4 23598 daub 12 0 2124 2124 464 R 0 45.3 0.4 747:44 mcmuvt.v96 68: vn47 29550 roman 12 0 8956 8956 876 R 0 45.2 1.7 8976m Run_mpi2 69: vn13 26292 roman 14 0 8932 8932 872 R 0 45.1 1.7 7589m Run_mpi2 70: vn62 2857 suqin 13 0 24592 24M 580 R 0 45.0 4.7 293:57 a.out 71: vn14 30826 roman 12 0 8960 8960 876 R 0 45.0 1.7 7589m Run_mpi2 72: vn41 6066 roman 14 0 8476 8368 352 R 0 44.8 1.6 18557m Run_mpi4 73: vn46 29743 roman 10 0 3896 3896 984 R 0 44.7 0.7 8643m Run_mpi2 74: vn24 19399 roman 10 0 8940 8940 872 R 0 44.6 1.7 11283m Run_mpi4 75: vn24 19379 roman 10 0 8964 8964 876 R 0 44.6 1.7 11262m Run_mpi4 76: vn49 29469 roman 10 0 8936 8936 876 R 0 44.5 1.7 9007m Run_mpi2 77: vn42 7704 roman 12 0 8964 8964 876 R 0 44.5 1.7 18744m Run_mpi4 78: vn22 21421 roman 13 0 8964 8964 876 R 0 44.5 1.7 11170m Run_mpi4 79: vn12 28225 roman 10 0 8932 8932 872 R 0 44.4 1.7 7567m Run_mpi2 80: vn18 25788 suqin 19 0 24420 23M 584 R 0 44.3 4.7 304:18 a.out 81: vn2 30169 fransp 11 0 3188 3188 768 R 0 44.2 0.6 158:58 ads 82: vn18 25816 suqin 19 0 24464 23M 580 R 0 44.0 4.7 303:15 a.out 83: vn43 7645 roman 10 0 8964 8964 876 R 0 43.9 1.7 18832m Run_mpi4 84: vn21 22352 roman 14 0 8964 8964 876 R 0 43.9 1.7 11147m Run_mpi4 85: vn7 16015 daub 10 0 2264 2264 464 R 0 43.7 0.4 1226m mcmuvt.v96 86: vn3 23566 daub 12 0 2324 2324 464 R 0 43.7 0.4 1421m mcmuvt.v96 87: vn23 21896 roman 14 0 8964 8964 876 R 0 43.7 1.7 11262m Run_mpi4 88: vn55 24263 fransp 10 0 3188 3188 768 R 0 43.5 0.6 174:45 ads 89: vn5 23746 daub 10 0 2208 2208 464 R 0 43.4 0.4 742:48 mcmuvt.v96 90: vn48 29489 roman 10 0 8956 8956 876 R 0 43.4 1.7 8997m Run_mpi2 91: vn34 26441 suqin 13 0 24428 23M 584 R 0 43.4 4.7 311:30 a.out 92: vn40 11216 roman 17 0 3532 3436 564 R 0 43.3 0.6 18128m Run_mpi4 93: vn60 24449 suqin 11 0 24508 23M 580 R 0 43.2 4.7 300:19 a.out 94: vn6 14008 daub 10 0 2388 2388 464 R 0 43.2 0.4 2194m mcmuvt.v96 95: vn41 6086 roman 11 0 8452 8340 348 R 0 43.2 1.6 18635m Run_mpi4 96: vn33 27972 suqin 15 0 24384 23M 580 R 0 43.2 4.7 312:16 a.out 97: vn30 23945 suqin 17 0 24376 23M 580 R 0 43.2 4.7 314:47 a.out 98: vn20 1649 roman 12 0 8940 8940 872 R 0 43.1 1.7 11299m Run_mpi4 99: vn50 25584 suqin 16 0 24608 24M 576 R 0 42.8 4.7 0:10 a.out 100: vn37 25381 fransp 10 0 3188 3188 768 R 0 42.8 0.6 160:22 ads 101: vn29 23855 suqin 10 0 24484 23M 580 R 0 42.8 4.7 303:14 a.out 102: vn32 17163 suqin 12 0 24396 23M 580 R 0 42.7 4.7 313:56 a.out 103: vn9 20280 daub 10 0 2292 2292 464 R 0 42.6 0.4 741:07 mcmuvt.v96 104: vn15 30205 fransp 10 0 3188 3188 768 R 0 42.6 0.6 392:07 ads 105: vn38 29022 suqin 11 0 24504 23M 580 R 0 42.5 4.7 301:47 a.out 106: vn28 31106 suqin 11 0 24604 24M 576 R 0 42.5 4.7 1:25 a.out 107: vn25 7717 fransp 13 0 3188 3188 768 R 0 42.4 0.6 401:55 ads 108: vn61 3293 suqin 10 0 24556 23M 580 R 0 42.2 4.7 295:31 a.out 109: vn20 1628 roman 10 0 3904 3904 972 R 0 41.5 0.7 10893m Run_mpi4 110: vn17 21958 suqin 9 0 24568 23M 576 R 0 38.4 4.7 4:04 a.out 111: vn34 26448 suqin 13 0 24420 23M 584 R 0 37.2 4.7 310:31 a.out 112: vn16 6370 matt 18 0 1200 1200 844 R 0 14.0 0.2 0:00 top 113: vn39 29170 matt 13 0 1144 1144 844 R 0 12.4 0.2 0:00 top 114: vn50 25600 matt 15 0 1132 1132 844 R 0 11.3 0.2 0:00 top 115: vn34 28863 matt 11 0 1120 1120 844 R 0 11.2 0.2 0:00 top 116: vn18 28155 matt 15 0 1132 1132 844 R 0 11.1 0.2 0:00 top 117: vn30 26422 matt 14 0 1132 1132 844 R 0 11.0 0.2 0:00 top 118: vn32 19583 matt 12 0 1132 1132 844 R 0 10.9 0.2 0:00 top 119: vn51 27264 matt 7 0 1096 1096 844 R 0 9.9 0.2 0:00 top 120: vn40 18100 matt 13 0 1060 1060 844 R 0 9.9 0.2 0:00 top 121: vn1 15910 matt 7 0 1116 1116 844 R 0 9.9 0.2 0:00 top 122: vn26 2868 matt 13 0 1068 1068 844 R 0 9.7 0.2 0:00 top 123: vn33 30439 matt 12 0 1072 1072 844 R 0 9.6 0.2 0:00 top 124: vn11 3321 matt 9 0 1096 1096 844 R 0 9.6 0.2 0:00 top 125: vn38 31383 matt 11 0 1072 1072 844 R 0 9.3 0.2 0:00 top 126: vn24 23639 matt 7 0 1052 1052 844 R 0 9.3 0.2 0:00 top 127: vn25 10716 matt 12 0 1072 1072 844 R 0 9.2 0.2 0:00 top 128: vn60 26801 matt 10 0 1052 1052 844 R 0 9.0 0.2 0:00 top Sat Feb 12 14:34:57 PST 2000 In looking at memory on rebooted node, notice that vnfe1 is short total used free shared buffers cached Mem: 517260 513800 3460 77564 260672 118500 -/+ buffers/cache: 134628 382632 Swap: 650624 7680 642944 ... but substantial amount is in buffers/cache vnallCommand free > /tmp/FREE vnallCommand pstree > /tmp/PSTREE vnallCommand 'jj fpi' > /tmp/FPI vnallCommand 'jj cactus' > /tmp/CACTUS vnallbgCommand 'killall fpi' ############################################################ Sat Feb 12 17:44:59 PST 2000 ############################################################ (1) vnfe1 hung up, see README.CRASH (CRASH_45) ############################################################ Mon Feb 14 06:48:09 PST 2000 ############################################################ (1) Created idle account on vnfe1, run RTOP as idle ############################################################ Mon Feb 14 09:33:32 PST 2000 ############################################################ (1) Lothar reports MPI problem with vn21, rebooted see README.CRASH (CRASH_46) ############################################################ Mon Feb 14 12:33:01 PST 2000 ############################################################ (1) Lothar reports problems with vn55, pingable but cannot telnet, sshd see README.CRASH (CRASH_47) kdm?? ############################################################ Tue Feb 15 10:01:36 PST 2000 ############################################################ (1) Lothar reports problems with vn30 (as above), and may be related to old MPI processes (lothar, murashov) see README.CRASH (CRASH_48) ############################################################ Tue Feb 15 17:15:42 PST 2000 ############################################################ (1) Kendal reports MPI problems on vn16, vn23, vn24 README.CRASH (CRASH_49, CRASH_50, CRASH_51) ############################################################ Fri Feb 18 23:44:30 PST 2000 ############################################################ (1) New Account for Liam McWhirter nu cat<liam liam:x:1252:1200:James Liam McWhirter:/d/vnfe2/home/liam:/bin/csh END vnNewUsers liam # FFcC.ISYJl9EQ ############################################################ Sat Feb 19 02:25:43 PST 2000 ############################################################ # Upsized jnss internal arrays (need to do the same # with nss) # As matt@laplace Export jvs Distgz jvs # As matt@vnfe1 vnallbgCommand "cdi; ./Installz jvs" # Install, Installz are missing AGAIN! Would be nice to know why this is # happening vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install* /var/tmp/install; cd /var/tmp/install; ls' vnallbgCommand "cdi; ./Installz jvs" # Modified 'Distgz' on laplace to scp .tar.gz file to various remote hosts ############################################################ Wed Feb 23 12:47:11 PST 2000 ############################################################ (1) New Account for Arman Rahmim nu cat<rahmim rahmim:x:9007:9000:Arman Rahmim:/d/vnfe1/home/rahmim:/bin/tcsh END vnNewUsers rahmim # ?????? ############################################################ Wed Feb 23 20:13:37 PST 2000 ############################################################ (1) Dave reports hung node vn23 README.CRASH (CRASH_52) NEXT_ACTION vnallCommand 'ps -elf | grep -i cactus | grep -v grep' > /tmp/CACTUS # Cleanup # Problems on vn3 vn4 vn11 vn13 vn14 vn15 vn16 . . . vnallCommand 'killall cactus_linux_mpi' vnallCommand 'ps -elf | grep -i cactus | grep -v grep' > /tmp/CACTUS >>> Executing as root@142.103.237.21 000 R dave 32082 1 0 69 0 - 16784 - Feb19 ? 00:58:55 cactus_linux f.par ssh root@vn21 killall cactus_linux vnallCommand 'ps -elf | grep -i cactus | grep -v grep' > /tmp/CACTUS # Clean-up complete (and apparently successful) ############################################################ Thu Feb 24 18:33:30 PST 2000 ############################################################ (1) Working with Dave's Cactus code /d/vnfe1/home/matt/debug/dave/ /d/vnfe1/home/matt/debug/dave/cactus /d/vnfe1/home/matt/debug/dave/cactus/exe . . . Level | it | | gxxn | ham | | | t | Max Min | Max Min | ------------------------------------------------------------------------------- | 0 | 0.000| 3.000000000 1.000000000 | 0.695538506 -0.573248827 | ACA 0.0000000000000000E+000 0.0000000000000000E+000 2 beta_avg 0.1541997909206136 0.1541997909206134 0.1541997909206140 g_xx norm 0.1027850739477869 rm_l_1_23317: p4_error: interrupt SIGINT: 2 p0_7806: p4_error: interrupt SIGSEGV: 11 bm_list_7807: p4_error: interrupt SIGINT: 2 rm_l_3_19204: p4_error: interrupt SIGINT: 2 rm_l_2_11450: p4_error: interrupt SIGINT: 2 29.220u 14.860s 0:47.39 93.0% 0+0k 0+0io 6523pf+0w # Running on vn1 vn26 vn27 vn28 # Error reproducible, ask for confirmation from Dave, looks like it # might be an array violation?? # Dave confirms ... ############################################################ Fri Feb 25 10:14:49 PST 2000 ############################################################ (1) Fixing bug in svs.c, first re-installing RNPL # As matt@laplace Exp rnpl # As matt@vnfe1 vnallCommand ls -lt /usr/local/lib/libsvs.a > /tmp/LIBSVS # Good thing I checked, Installz missing on many machines (again!) vnallbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install* /var/tmp/install; cd /var/tmp/install; ls' vnbgCommand 'cd ~matt/system/vn/image/master/install; /bin/cp Install* /var/tmp/install; cd /var/tmp/install; ls' # Is there a problem copying the same file from too many processors? vnallbgCommand "cdi; ./Installz svs" vnallCommand ls -lt /usr/local/lib/libsvs.a > /tmp/LIBSVS ############################################################ Sat Feb 26 14:31:11 PST 2000 ############################################################ (1) rvs (rvs_cli_cf.h, rvs_cli.c) vnallbgCommand "cdi; ./Installz rvs" vnallCommand ls -lt /usr/local/lib/libvs.a > /tmp/LIBVS (2) rvsso (rvs_cli_cf.h, rvs_cli.c) vnallbgCommand "cdi; ./Installz rvsso" vnallCommand ls -lt /usr/local/lib/libvs.a > /tmp/LIBVS bhbgCommand 'cdi; ./Installz rvs' bhbgCommand 'cdi; ./Installz rvsso' (3) hvs (PROBLEMS) vnallbgCommand "cdi; ./Installz hvs" bhbgCommand "cdi; ./Installz hvs" (4) jvs vnallbgCommand "cdi; ./Installz jvs" bhbgCommand "cdi; ./Installz jvs" ############################################################ Sun Feb 27 10:54:11 PST 2000 ############################################################ (1) sdftosv apparently not working on vn machines, reinstall RNPL vnallbgCommand "setenv CFLAGS '-O6'; cdi; RM -rf rnpl; tar xfz ~matt/autoconf/rnpl.tar.gz; cd rnpl; configure --prefix=/usr/local; make full" # (Looks OK on bh[123456]) vnallCommand 'ls -lt `which sdftosv`' ############################################################ Tue Feb 29 12:09:43 PST 2000 ############################################################ CRASH_54 rar0502 down 108+18:13 vnfe3 down 1:03 Pingable ... incommunicado ... hook up monitor # Dave was last on machine??? (Why, Dave?) ############################################################ Wed Mar 1 12:19:37 PST 2000 ############################################################ CRASH_55 rar0502 down 109+18:23 vn10 down 0:39 # Dave again? (Nice to have a goat) ############################################################ Thu Mar 2 07:36:18 PST 2000 ############################################################ CRASH_56, CRASH_57 (1) Matt hung the following nodes running too-huge MPI jobs (and ctrl-C-ing out etc.) vn13 vn51 # NEED TO GET A WATCHDOG RUNNING AGAIN (rsh or MPI based though) ############################################################ Thu Mar 2 15:48:54 PST 2000 ############################################################ (1) New Account for Trevor Stocki nu cat<stocki stocki:x:9008:9000:Trevor J. Stocki:/d/vnfe1/home/stocki:/bin/tcsh END vnNewUsers stocki etc # EwSUqkvvMnn42 vnDistEtc shadow ############################################################ Fri Mar 3 22:33:44 PST 2000 ############################################################ (1) vn10 down (Inaki) See CRASH_58 ############################################################ Tue Mar 7 06:50:30 PST 2000 ############################################################ (1) New Account for Brock Wilson nu cat<brock awilson:x:622:600:Brock Wilson:/d/vnfe1/home/awilson:/bin/tcsh END vnNewUsers brock etc # ZbEaFgI/H/sJM vnDistEtc shadow ############################################################ Tue Mar 7 13:01:14 PST 2000 ############################################################ (1) vn13 down (Dave 'mv'??) See CRASH_59 here are the last commands i executed: ( Cactus code runs and stops ) determinant = 0 in ginv. Sorry. So long... FORTRAN STOP [dave@vn13 exe]$ cd fs [dave@vn13 fs]$ ls [dave@vn13 fs]$ cd .. [dave@vn13 exe]$ ls [dave@vn13 exe]$ mv fs fs_nompi [dave@vn13 exe]$ ls [ vn13 dies here...] ############################################################ Tue Mar 7 13:01:14 PST 2000 ############################################################ (1) Via 'mv'ing on vn13 managed to hang up node in pretty short order ... See CRASH.60 Try to hang up another node?? First install watch-dog?? (2) Modified 'vnHello' (added restart to try to restart inetd), NOTE SLEEP -> 5 !! # As root@vn13 crontab -e # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.876 installed on Tue Mar 7 16:05:44 2000) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) #min hour daymo month daywk cmd 0,15,30,45 * * * * date >> /tmp/DATE 0,5,10,15,20,25,30,35,40,45,50,55 * * * * /d/vnfe1/home/matt/scripts/vnHello restart 3,18,33,48 * * * * /d/vnfe1/home/matt/scripts/vnHello reboot (3) With watchdog running on vn13, will try to hang again with 'mv' # As matt@vn13 cd debug/rnpl cp -r wave2d wave2d_crash cd wave2d_crash mv Archive Archive.O mv Archive.O Archive # Hung the machine ... now see whether it comes back from auto restart restart doesn't appear to have worked reboot at 16:33? Nope, 16:37, auto reboot doesn't appear to have worked (due to non-use of full path names to scripts!!) # Hard reboot # Any node?? Front-end?? # Try to hang machine again cd debug/rnpl RM -r wave2d_crash cp -r wave2d !$ cd !$ mv Archive Archive.O mv Archive.O Archive # kills it at 16:45 (maybe any command after 'mv' hangs it??) reboot at 16:48? Nope # Another hard reboot # Looks like auto reboot may not work with NFS accessed scripts? # As root@vn13 cd /root mkdir scripts cd scripts cp ~matt/scripts/vnHello ~matt/scripts/vnHelloReboot . # Made some changes to minimize NFS usage [root@vn13]# crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.721 installed on Tue Mar 7 17:00:27 2000) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) #min hour daymo month daywk cmd 0,15,30,45 * * * * date >> /tmp/DATE 0,5,10,15,20,25,30,35,40,45,50,55 * * * * /root/scripts/vnHello restart 2,7,12,17,22,27,32,37,42,47,52,57 * * * * /root/scripts/vnHello reboot # Try to hang it again cd debug/rnpl RM -r wave2d_crash cp -r wave2d !$ cd !$ mv Archive Archive.O ls # kills it at 17:10 (apparently confirming that next command will hang it up) reboot at 17:12? Nope # Final hard reboot (maybe) ... might be an idea to 'mv' the 'mv' command, or automatically # alias it on front-ends?? # Not all 'mv's kill the node cd pwd /d/vnfe1/home/matt/ mv Archive Archive.O # seems to be OK [matt@vn13 rnpl_crash]$ pwd /d/vnfe1/home/matt/debug/rnpl_crash [matt@vn13 rnpl_crash]$ ls fwave3d/ wave2d/ wave2d_0/ wave3d1o/ [matt@vn13 rnpl_crash]$ mv wave2d wave2d.O [matt@vn13 rnpl_crash]$ [matt@vn13 rnpl_crash]$ ls # So is it the depth in the hierarchy here??? # Still didn't come back ?? # OK last hard-reboot, watchdog apprently ineffectual vnallCommand 'cd /bin; cp mv mv.real;' vnCommand 'which mv.new; mv.new' FUTURE_ACTION: Install 'mv.new' starting on vn13 ?? ssh root@vn13 'cd /bin/; cp ~matt/scripts/mv.new mv' ssh root@vn13 'ls -lt /bin/mv' FUTURE_ACTION: vnCommand 'cd /bin/; CP ~matt/scripts/mv.new mv' vnCommandMatt 'mv' ############################################################ *** SEE README.MV ############################################################ ############################################################ Fri Mar 10 18:54:37 PST 2000 ############################################################ (1) Scott H. hung up vn64 with 'mv' ############################################################ Sat Mar 11 11:37:33 PST 2000 ############################################################ (1) From Bill re NFS From unruh@physics.ubc.ca Sat Mar 11 10:19:36 2000 have been having trouble with my nfs and locking of files. I have replaced the knfsd rpm package with the nfsd-util packages from mandrake 7.0 and it seems to have corrected the locking problem at least on the one server I installed it on. The files, compiled by me on string a Mandrake 6.1 system, are the two nfs-util* rpm files in /usr/rpm on sring. You have to uninstall knfsd first ( probably unmounting all mounted file systems) rpm -e knfsd and then install these rpm -Uhv nfs-util* Do not know if it will solve the mv problem of course. Note the source is in /usr/local/src/nfs-util*src.rpm on string. You can rebuild the rpm with rpm --rebuild nfs-util*src.rpm which will put the two binary rpms into /usr/src/RPM/RPMS From unruh@physics.ubc.ca Sat Mar 11 11:17:43 2000 The bug report procedure to mandrake is submit@bugs.linux-mandrake.com At the head of the message put in two lines Package: name of package (eg knfsd) Version: version number of package Then describe the bug. www.mandrake.com/bugs ############################################################ Tue Mar 14 12:46:30 PST 2000 ############################################################ (1) Coded new .segdat utility segrtr in utilio, need to install on vn..., bh..., laplace, godel vnallbgCommand "cdi; Install utilio" vnallbgCommand "cdi; Install utilio" vnallCommand "cd /usr/local/lib; strings libutilio.a" > /tmp/utilio vi /tmp/utilio vnallCommand "ls /usr/local/bin/nss" ############################################################ Sat Mar 18 06:46:31 PST 2000 ############################################################ (1) Problem with clock on vn14 ?? # Set time with vnSetdate, ntpd is running but clock gets # off by seconds in a minute or so? cd /etc killall ntpd mv ntp.drift ntp.drift.O vnsetDate ntptimeset [root@vn14]# ntptimeset Your clock is off by 0.8245555 seconds. (142.103.237.226) [15/15] [root@vn14]# !! ntptimeset Your clock is off by 3.6950360 seconds. (142.103.237.227) [15/15] (2) syslogd not running on vn14 (3) Drift on vn14 looks like 10% (!), for time being add cron job to reset every 5 minutes [root@vn14]# crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.24384 installed on Sat Mar 18 07:03:11 2000) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) #min hour daymo month daywk cmd 0,5,10,15,20,25,30,35,40,45,50,55 * * * * /d/vnfe1/home/matt/scripts/vnSetdate ############################################################ Sat Mar 18 15:31:12 PST 2000 ############################################################ # Hung up vn13 with mpptest running on vn13/vn17 (after # interrupt), maybe should recompile with latest version of # driver? See README.CRASH CRASH_62 Sat Mar 18 16:19:33 PST 2000 # Try to reproduce crash # As matt@vnfe1 vnCommand -> vntestN -> vn17 vn21 vnTop vn17 vn21 vnMpptest vn17 vn21 # let run for about a minute, then CRTL-C # leaves mpptest running! vnCommand killall mpptest # And everything looks OK ... so really is a case of cleanup?? # Try doubling up vnMpptest vn17 vn21 ^-C vnMpptest vn17 vn21 x ^-C vnCommand killall mpptest # Again, everything looks OK ... should perhaps install watchdog? vnMpptest vn17 vn21 ^-C vnMpptest vn17 vn21 ^-C # Still OK, so who knows? Guess I'll have to wait a bit for the # next hang Mpirun 16 wave id_12_500 ^-C at step 25 # Basic SMP performance test Mpirun 16 wave id_12 (50 iter) 59.880u 2.680s 1:06.46 94.1% 0+0k 0+0io 17596pf+0w Mpirun 16 wave id_12 (8,34) Mpirun 16 wave id_12 (36,46) # 34, 36, 38 50 etc running both 60.630u 2.610s 1:06.90 94.5% 0+0k 0+0io 17547pf+0w 60.220u 3.150s 1:07.31 94.1% 0+0k 0+0io 17577pf+0w # Everything looks fine ############################################################ Sat Mar 18 18:14:20 PST 2000 ############################################################ (1) Stress testing MPI/cards etc. with vnMpptest vnMpptest vn4 vn46 vnMpptest vn4 vn46 TO_DO ssh vn4 'killall mpptest' ssh vn46 'killall mpptest' vnMpptest vn48 vn50 vnMpptest vn48 vn50 TO_DO ssh vn48 'killall mpptest' ssh vn50 'killall mpptest' vnMpptest vn53 vn54 vnMpptest vn53 vn54 TO_DO ssh vn53 'killall mpptest' ssh vn54 'killall mpptest' vnMpptest vn53 vn56 TO_DO ssh vn53 'killall mpptest' ssh vn56 'killall mpptest' ############################################################ Sat Mar 18 18:54:58 PST 2000 ############################################################ Disk usage Filesystem 1k-blocks Used Available Use% Mounted on /dev/sda1 16445682 12462350 3127070 80% / /dev/sdb1 17066300 13771476 2406030 85% /home2 vnfe2:/home 16445682 1904757 13684663 12% /d/vnfe2/home vnfe2:/home2 17066300 14 16177492 0% /d/vnfe2/home2 vnfe3:/home 16445682 12684414 2905006 81% /d/vnfe3/home vnfe3:/home2 17066300 15 16177491 0% /d/vnfe3/home2 2175642 stocki 2018997 matt 1705332 luisl 1681411 ehonda 1631924 shawley 543395 inaki 444745 dave 282268 minghe 137514 fransp 127868 mijan 333584 daub 57382 murashov 35189 roman 5485468 suqin 4326164 atsci 884555 wkb 348725 xiao ############################################################ Sun Mar 19 06:11:31 PST 2000 ############################################################ (1) From Bill The LC_COLLATE type is set in /etc/profile.d/lang.sh or lang.csh On your it seems to be being set to LC_ALL=en, which seems to lead to the weird behaviour. You could try LC_ALL=C or LC_ALL=Posix which seem to produce the correct (ie ABCabc) behaviour. # Updated csh.cshrc on bh[12345] setenv LC_ALL Posix ############################################################ Sun Mar 19 06:39:14 PST 2000 ############################################################ (1) Downloading Staroffice Registering with Sun choptuik (id/passwd) http://www.sun.com/staroffice/get.cgi # Downloading 68 M (!) at 35 K /sec (!) # Simultaneously looking for dist. on bh1 tar xf so51a_lnx_01.tar cd /var/tmp/install/so51inst/office51 ./setup ############################################################ Sun Mar 19 09:19:57 PST 2000 ############################################################ (1) REBOOT CHECK set N=vn5 set N=vn7 set N=vn24 set N=vn19 set N=vn49 set N=vn42 set N=vn43 set N=vn45 set N=vn44 set N=vn41 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" Linux vn5.physics.ubc.ca 2.2.14-Psmp #1 SMP Fri Mar 17 23:30:57 PST 2000 i686 unknown Sun Mar 19 09:21:43 PST 2000 Sun Mar 19 09:21:44 PST 2000 Your clock is off by 0.2061420 seconds. (142.103.237.225) [15/15] 140 S root 444 1 0 60 0 - 325 skb_re 09:19 ? 00:00:00 rwhod 100 S root 631 629 0 70 0 - 509 rt_sig 09:20 ? 00:00:00 tcsh -c hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | g vn5 up 0:01, 0 users, load 0.75, 0.19, 0.05 set N=vn58 set N=vn39 set N=vn20 set N=vn18 set N=vn10 set N=vn1 set N=vn47 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" # vn1 didn't come back Came back with keyboard/monitor Came back with monitor # vn47 didn't come back Came back with keyboard/monitor # vn18 didn't come back (no route to host) Came back with monitor ############################################################ Mon Mar 20 14:30:41 PST 2000 ############################################################ (1) Last set of reboots foreach n (vn3 vn12 vn14 vn15 vn16 vn22 vn37 vn63 vn64) echo "executing ssh root@${n} reboot" ssh root@${n} reboot sleep 10 end # Nothing coming back!! SOME RCP PROBLEM SHOULD HAVE RE-BOOTED ONE AT A TIME # Hard rebooted vn3 vn12 vn14 vn15 vn16 vn22 vn37 vn63 vn64 set N=vn3 set N=vn12 set N=vn14 set N=vn15 set N=vn16 set N=vn22 set N=vn37 set N=vn63 set N=vn64 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" vnallCommand 'uname -a' > /tmp/UNAME grep mdksmp /tmp/UNAME Linux vnfe2.physics.ubc.ca 2.2.13-7mdksmp #1 SMP Wed Sep 15 16:38:50 CEST 1999 i686 unknown # As root@vnfe2 # reboot # TAPE DEVICE NOT BEING RECOGNIZED BY SCSI SCAN AT BOOT-UP. CHECKED BIOS # PARAMETERS ETC. BUT NO GO. LOOKS LIKE IT'S HARDWARE. vnCommand 'umount -f vnfe2:/home vnfe2:/home2; mount -a; df; cd ~roman; ls' ssh root@vnfe1 'umount -f vnfe2:/home vnfe2:/home2; mount -a; df; cd ~roman; ls' ssh root@vnfe3 'umount -f vnfe2:/home vnfe2:/home2; mount -a; df; cd ~roman; ls' vnallCommand 'ls ~matt; ls ~roman; ls ~wkb' cd rnpl/wave2d Mpirun 50 wave id_13 88.170u 19.250s 2:19.65 76.9% 0+0k 0+0io 49911pf+0w (50 steps) ############################################################ Mon Mar 20 16:35:03 PST 2000 ############################################################ (1) Trying to diagnose clock on vn14 as per Bill's suggestion --systohc option to hwclock will set vnallCommand hwclock --show > /tmp/HWC0 vnallCommand hwclock --systohc > /tmp/HWC1 vnallCommand hwclock --show > /tmp/HWC2 (2) bhCommand 'hwclock --systohc' bhCommand 'hwclock --show' ############################################################ Mon Mar 20 22:12:13 PST 2000 ############################################################ (1) Ming He reports no PG compilers, license manager isn't getting restarted. Modify /etc/rc.d/rc.local ############################################################ Thu Mar 23 11:46:32 PST 2000 ############################################################ (1) vn20 down (Inaki?) vn19 accidentally powered down See README.CRASH (CRASH_63, CRASH_64) ############################################################ Thu Mar 23 20:04:56 PST 2000 ############################################################ (1) Note that Network cards are on different IRQs on different machines. (2) Stress testing vnMpptest vn2 vn26 vnMpptest vn2 vn26 1 vnMpptest vn33 vn34 vnMpptest vn33 vn34 1 vnMpptest vn35 vn38 vnMpptest vn35 vn38 1 vnMpptest vn39 vn43 vnMpptest vn39 vn43 1 vnMpptest vn58 vn59 vnMpptest vn58 vn59 1 vnMpptest vn60 vn61 vnMpptest vn60 vn61 1 vnTop 2 26 33 34 35 38 39 43 58 59 60 61 (3) Switch shows ports operating at about 80% max, also hardware picked up vn20 problem (a first!) Description: A high percentage of data errors was detected on port D4. Possible causes: The possible causes include faulty cabling or toplogy, half/full duplex mismatch, a misconfigured NIC, or a malfunctioning NIC, NIC driver, or transceiver. Actions: 1.If port D4 is 100Base-T, make sure the cable connectors punch-down blocks, and patch panels connecting to that port are Category 5 or better. Verify the correctness of the installation using a Category 5 test device. 2.Check the directly-connected device for mismatches in half/full duplex operation (half duplex on the switch and full duplex on the connected device, or the reverse). 3.Update the NIC driver software. 4.Verify that the network topology conforms to IEEE 802.3 standards. 5.Replace or relocate the cable. Also check the wiring closet, components, transceivers, and NICs for proper operation. (4) Cards/drivers don't seem to be performing as well as they had been, but perhaps that's not the case ... actually saw one close to 100% this AM vnTop 2 26 33 34 35 38 39 43 58 59 60 61 Fri Mar 24 06:39:07 PST 2000 (5) Ports with errors Errors Drops RX TX A1 0 647 A2 vnfe1 41 0 <- All alignment Rx B3 vn3 1 0 C3 vn11 0 564 D4 vn20 13 0 <- Switch flagged as excessive E3 vn27 0 409 E6 vn30 0 335 F6 vn38 0 2874 G5 vn45 0 2031 H3 vn51 0 1566 H4 vn52 0 3586 H5 vn53 0 1587 H6 vn54 0 496 H7 vn55 1 0 I8 vn64 1 0 Fri Mar 24 07:54:30 PST 2000 Same errors, going over to check cabling (1) Archiving Rtop # As matt@vnfe1 # implemented 'ts' to produce timestamp name cd vn ts 2000:03:24:0759 mv Rtop Rtop.2000:03:24:0759 mkdir Rtop mv Rtop.2000:03:24:0759 ../vnArchive tar cf Rtop.tar Rtop.2000:03:24:0759 gzip Rtop.tar # ftp to laplace:/usr2/people/matt/system/vnArchive ############################################################ Thu Mar 23 20:04:56 PST 2000 ############################################################ # These transfers get the port up to 97 Mbit or so ftp> put bf4.gz local: bf4.gz remote: bf4.gz 200 PORT command successful. 150 Opening BINARY mode data connection for bf4.gz. 226 Transfer complete. 142571760 bytes sent in 13 secs (1.1e+04 Kbytes/sec) ftp> put bf4.gz local: bf4.gz remote: bf4.gz 200 PORT command successful. 150 Opening BINARY mode data connection for bf4.gz. 226 Transfer complete. 142571760 bytes sent in 12.8 secs (1.1e+04 Kbytes/sec) # 400 Mb file starts to slow things down substantially ftp> put B.gz local: B.gz remote: B.gz 200 PORT command successful. 150 Opening BINARY mode data connection for B.gz. 226 Transfer complete. 427715280 bytes sent in 104 secs (4e+03 Kbytes/sec) ############################################################ Fri Mar 24 09:05:29 PST 2000 ############################################################ (1) Coded 'vnUsage', testing run via cron on vnfe1 # As root@vnfe1 crontab -e crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.24208 installed on Fri Mar 24 09:06:39 2000) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) # Format of lines: #min hour daymo month daywk cmd # Weekly backup of relativity machines 45 23 * * 1 /backups/scripts/weekly-backup # Level-0 backups (done once a month instead of weekly backup). #45 23 * * 1 /backups/scripts/level-0-backup # Reminder to do level-0's. 00 07 01 * * echo "Time to do the cluster's level-0s" | mail jason@physics.ubc.ca # Get node status (load factors) and export to laplace.physics.ubc.ca Web pages 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnStatus ; /d/vnfe1/home/matt/scripts/vnPigs 15 09 * * * /d/vnfe1/home/matt/scripts/vnUsage # Seemed to work OK, will have it run at 5:00AM # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.26284 installed on Fri Mar 24 09:46:10 2000) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) # Format of lines: #min hour daymo month daywk cmd # Weekly backup of relativity machines 45 23 * * 1 /backups/scripts/weekly-backup # Level-0 backups (done once a month instead of weekly backup). #45 23 * * 1 /backups/scripts/level-0-backup # Reminder to do level-0's. 00 07 01 * * echo "Time to do the cluster's level-0s" | mail jason@physics.ubc.ca # Get node status (load factors) and export to laplace.physics.ubc.ca Web pages 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnStatus ; /d/vnfe1/home/matt/scripts/vnPigs 0 05 * * * /d/vnfe1/home/matt/scripts/vnUsage ############################################################ Fri Mar 24 10:12:56 PST 2000 ############################################################ (1) In machine room, switch reports same errors as above---checking cabling Nothing obviously wrong, and apparently everything running smoothly ############################################################ Fri Mar 24 16:10:49 PST 2000 ############################################################ (1) Switch error status ... same as previously ############################################################ Sat Mar 25 09:02:31 PST 2000 ############################################################ (1) Switch error status ... same as previously THINGS ARE LOOKING PRETTY STABLE Starting another couple of mpptest processes on vn19, vn20 FUTURE_ACTION vnTop 2 19 20 26 33 34 35 38 39 43 58 59 60 61 ############################################################ Sun Mar 26 06:07:03 PST 2000 ############################################################ (1) Cluster looking fine, stopping mpptest's after about 60 CPU-days cumulative error-free runtime ############################################################ Tue Mar 28 19:26:42 PST 2000 ############################################################ (1) Things obviously going too well, vn20 down again. pingable, but otherwise out. see README.CRASH (CRASH_65) Switch log definitely suggests possibility of bad hardware?? Nope, consistent with multiple resets of machine. Try replacing network card? ############################################################ Wed Mar 29 08:03:17 PST 2000 ############################################################ (1) Tons of Mar 27 09:16:06 vnfe1 kernel: find_fh_dentry: 08:01/3248287 dir/3248204 not found! messages in vnfe1's log starting at about Mar 27 09:12:33 vnfe1 sshd[29066]: log: Connection from 142.90.100.6 port 1267 Mar 27 09:12:35 vnfe1 sshd[29066]: log: Password authentication for stocki accepted. Mar 27 09:14:49 vnfe1 sshd[29080]: log: Connection from 142.103.234.22 port 1022 Mar 27 09:14:49 vnfe1 sshd[29080]: log: RSA authentication for inaki accepted. Mar 27 09:15:00 vnfe1 sshd[29096]: log: Connection from 142.103.237.14 port 1023 Mar 27 09:15:00 vnfe1 sshd[29096]: log: RSA authentication for root accepted. Mar 27 09:15:00 vnfe1 sshd[29096]: log: ROOT LOGIN as 'root' from vn14.physics.ubc.ca Mar 27 09:15:00 vnfe1 sshd[29098]: log: executing remote command as root: date +%m%d%H%M%Y.%S Mar 27 09:15:00 vnfe1 sshd[29096]: log: Closing connection to 142.103.237.14 Mar 27 09:15:58 vnfe1 kernel: find_fh_dentry: 08:01/3248288 dir/3248205 not found! Mar 27 09:15:58 vnfe1 kernel: find_fh_dentry: 08:01/3248288 dir/3248205 not found! Mar 27 09:16:06 vnfe1 kernel: find_fh_dentry: 08:01/3248287 dir/3248204 not found Nothing obvious in logs # Need to update NFS ?? cfind -i find_fh_dentry > /tmp/FIND <./fs/nfsd/nfsfh.c> 950:find_fh_dentry(struct knfs_fh *fh) 972: printk("find_fh_dentry: No SuperBlock for device .", 981: dprintk("find_fh_dentry: No inode found.\n"); 987: dprintk("find_fh_dentry: No inode found.\n"); 1000:printk("find_fh_dentry: Found a useless inode 0\n", inode->i_ino); 1009: printk("find_fh_dentry: Found / filehandle dirino = 0, 0\n", 1023: dprintk("find_fh_dentry: retry with 0\n", dirino); 1030: dprintk("find_fh_dentry: dirino not found 0\n", dirino); 1045:printk("find_fh_dentry: looked up /\n", 1053:printk("find_fh_dentry: / lookup mismatch!\n", 1081:printk("find_fh_dentry: /0 dir/0 not found!\n", 1146: dentry = find_fh_dentry(fh); ############################################################ /usr/src/linux/fs/nfsd/nfsfh.c ############################################################ /* * The is the basic lookup mechanism for turning an NFS file handle * into a dentry. There are several levels to the search: * (1) Look for the dentry pointer the short-term fhcache, * and verify that it has the correct inode number. * * (2) Try to validate the dentry pointer in the file handle, * and verify that it has the correct inode number. If this * fails, check for a cached lookup in the fix-up list and * repeat step (2) using the new dentry pointer. * * (3) Look up the dentry by using the inode and parent inode numbers * to build the name string. This should succeed for any Unix-like * filesystem. * * (4) Search for the parent dentry in the dir cache, and then * look for the name matching the inode number. * * (5) The most general case ... search the whole volume for the inode. * * If successful, we return a dentry with the use count incremented. * * Note: steps (4) and (5) above are probably unnecessary now that (3) * is working. Remove the code once this is verified ... */ static struct dentry * find_fh_dentry(struct knfs_fh *fh) { struct super_block *sb; struct dentry *dentry, *parent; struct inode * inode; struct list_head *lst; int looked_up = 0, retry = 0; ino_t dirino; /* * Stage 1: Look for the dentry in the short-term fhcache. */ dentry = find_dentry_in_fhcache(fh); if (dentry) { nfsdstats.fh_cached++; goto out; } /* * Stage 2: Attempt to find the inode. */ sb = get_super(fh->fh_dev); if (NULL == sb) { printk("find_fh_dentry: No SuperBlock for device %s.", kdevname(fh->fh_dev)); dentry = NULL; goto out; } dirino = u32_to_ino_t(fh->fh_dirino); inode = iget_in_use(sb, fh->fh_ino); if (!inode) { dprintk("find_fh_dentry: No inode found.\n"); goto out_five; } goto check; recheck: if (!inode) { dprintk("find_fh_dentry: No inode found.\n"); goto out_three; } check: for (lst = inode->i_dentry.next; lst != &inode->i_dentry; lst = lst->next) { dentry = list_entry(lst, struct dentry, d_alias); /* if we are looking up a directory then we don't need the parent! */ if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) { printk("find_fh_dentry: Found a useless inode %lu\n", inode->i_ino); continue; } if (dentry->d_parent->d_inode->i_ino != dirino) continue; dget(dentry); iput(inode); #ifdef NFSD_DEBUG_VERBOSE printk("find_fh_dentry: Found%s %s/%s filehandle dirino = %lu, %lu\n", retry ? " Renamed" : "", dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_parent->d_inode->i_ino, dirino); #endif goto out; } /* for inode->i_dentry */ /* * Before proceeding to a lookup, check for a rename */ if (!retry && (dirino = nfsd_cached_lookup(fh))) { dprintk("find_fh_dentry: retry with %lu\n", dirino); retry = 1; goto recheck; } iput(inode); dprintk("find_fh_dentry: dirino not found %lu\n", dirino); out_three: /* * Stage 3: Look up the dentry based on the inode and parent inode * numbers. This should work for all Unix-like filesystems. */ looked_up = 1; dentry = lookup_inode(u32_to_kdev_t(fh->fh_dev), u32_to_ino_t(fh->fh_dirino), u32_to_ino_t(fh->fh_ino)); if (!IS_ERR(dentry)) { struct inode * inode = dentry->d_inode; #ifdef NFSD_DEBUG_VERBOSE printk("find_fh_dentry: looked up %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); #endif if (inode && inode->i_ino == u32_to_ino_t(fh->fh_ino)) { nfsdstats.fh_lookup++; goto out; } #ifdef NFSD_PARANOIA printk("find_fh_dentry: %s/%s lookup mismatch!\n", dentry->d_parent->d_name.name, dentry->d_name.name); #endif dput(dentry); } /* * Stage 4: Look for the parent dentry in the fhcache ... */ parent = find_dentry_by_ino(u32_to_kdev_t(fh->fh_dev), u32_to_ino_t(fh->fh_dirino)); if (parent) { /* * ... then search for the inode in the parent directory. */ dget(parent); dentry = lookup_by_inode(parent, u32_to_ino_t(fh->fh_ino)); dput(parent); if (dentry) goto out; } out_five: /* * Stage 5: Search the whole volume, Yea Right. */ #ifdef NFSD_PARANOIA printk("find_fh_dentry: %s/%u dir/%u not found!\n", kdevname(u32_to_kdev_t(fh->fh_dev)), fh->fh_ino, fh->fh_dirino); #endif dentry = NULL; nfsdstats.fh_stale++; out: expire_all(); return dentry; } ############################################################ cd /usr/src/linux cfind -i NFSD_PARANOIA > /tmp/NFSD_PARANOIA ############################################################ <./fs/nfsd/export.c> 30:#define NFSD_PARANOIA 1 161:#ifdef NFSD_PARANOIA 202:#ifdef NFSD_PARANOIA <./fs/nfsd/vfs.c> 42:#define NFSD_PARANOIA <./fs/nfsd/nfsfh.c> 23:#define NFSD_PARANOIA 1 280:#ifdef NFSD_PARANOIA 655:#ifdef NFSD_PARANOIA 720:#ifdef NFSD_PARANOIA 727:#ifdef NFSD_PARANOIA 756:#ifdef NFSD_PARANOIA 772:#ifdef NFSD_PARANOIA 804:#ifdef NFSD_PARANOIA 850:#ifdef NFSD_PARANOIA 855:#ifdef NFSD_PARANOIA 863:#ifdef NFSD_PARANOIA 869:#ifdef NFSD_PARANOIA 1052:#ifdef NFSD_PARANOIA 1080:#ifdef NFSD_PARANOIA 1257:#ifdef NFSD_PARANOIA ############################################################ Wed Mar 29 08:45:49 PST 2000 ############################################################ (1) Trying to figure out RAM-disk support initrd-2.2.13-7mdk.img initrd-2.2.13-7mdksmp.img Kernel configuration option (Block Devices) # As root@vnfe1 # 'Help' suggests that RAM disk support should NOT # be required, but who knows? ############################################################ Wed Mar 29 13:57:52 PST 2000 ############################################################ (1) Dave's problem with backgrounding 'mpirun' (cactus) jobs see README.DAVE cd debug/dave scp -r root@vnfe1.physics.ubc.ca:/d/vnfe1/home/dave/BC . # Apparently just need to redirect stdin from /dev/null ############################################################ Wed Mar 29 15:24:39 PST 2000 ############################################################ (1) Installing "local" software with PG compilers ~/scripts/soPG cd ~matt/system/vn/image/master/install # Coded /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG vnfeCommand 'cdi; mkdir -p PGI; cd PGI; cp /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' # MWC utils # RNPL # Still including /usr/local/lib ?? ############################################################ Wed Mar 29 15:46:01 PST 2000 ############################################################ (1) Swapping out vn20's Intel NIC for 3-COM Select 3c59x driver via linuxconf set N=vn20 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" vnMpptest vn20 vn4 vnMpptest vn20 vn4 1 (2) Something wrong with card/configuration, CHECKSUM error reported at boot-up,. switch detecting many errors, throughput a few Mb/s (3) Going back to Intel card, will try to check out 3COM on one of bh machines vnTop vn20 vn4 vnMpptest vn20 vn4 vnMpptest vn20 vn4 1 # Things look better # Error status from vnswitch Status and Counters - Port Counters - Port D4 Link Status : Up Bytes Rx : 3,574,886,332 Bytes Tx : 1,080,079,624 Unicast Rx : 289,355,420 Unicast Tx : 3,029,897,501 Bcast/Mcast Rx : 86,319 Bcast/Mcast Tx : 10,371,004 FCS Rx : 7664 Drops Tx : 0 Alignment Rx : 7899 Collisions Tx : 0 Runts Rx : 28,229 Late Colln Tx : 0 Giants Rx : 0 Excessive Colln : 0 Total Rx Errors : 43,793 Deferred Tx : 0 # Same status at Thu Mar 30 05:54:03 PST 2000 # Same status at Fri Mar 31 10:21:24 PST 2000 ############################################################ Fri Mar 31 08:55:56 PST 2000 ############################################################ (1) Reinstalling rvs, rvsso (now includes vn) # Ensure Install, Installz are in place vnii vnallbgCommand 'cdi; Installz rvsso' vnallbgCommand 'cdi; Installz rvs' TO_DO vnallCommand 'cdi; setenv VSHOST laplace; cd rvs; make test; ssh matt@laplace vska' vnallCommand 'cdi; setenv VSHOST laplace; cd rvsso; make test; ssh matt@laplace vska' ############################################################ Thu Apr 6 09:31:13 PDT 2000 ############################################################ (1) Account for Julio Navarro nu cat<julio jfn:x:9009:9000:Julio Navarro:/d/vnfe1/home/jfn:/bin/tcsh END vnNewUsers julio etc # IM.JCyg5UK//Y vnDistEtc shadow ############################################################ Thu Apr 6 10:43:07 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn ts 2000:04:06:1043 mv Rtop Rtop.2000:04:06:1043 mkdir Rtop mv Rtop.2000:04:06:1043 ../vnArchive cd !$ tar cf Rtop.tar Rtop.2000:04:06:1043 gzip Rtop.tar # ftp to laplace:/usr2/people/matt/system/vnArchive ############################################################ Thu Apr 6 12:32:52 PDT 2000 ############################################################ (1) Continuing installation of "local" software with PG compilers, but tinkering with aclocal.m4 to ensure that /usr/local/lib defaults can be disabled (see matt@laplace:~/autoconf/README) setenv BBH_CHECK_DEFAULTS NONE setenv LIB_PATHS /usr/local/PGI/lib setenv INCLUDE_PATHS /usr/local/PGI/include ~/scripts/soPG cd ~matt/system/vn/image/master/install # Modified /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' # MWC utils vnfeCommand 'cdi; cd PGI; Installz.PG rvs' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG rvsso' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG vutil' PROBS dveclib1.h missing #end if vnfeCommand 'cdi; cd PGI; Installz.PG utilio' pgf77 -fast -Msecond_underscore -L/usr/local/PGI/lib -L. -L/usr/local/PGI/lib -L/usr/local/PGI/lib segrtr.o -o segrtr -lutilio -lvutil /usr/local/PGI/lib/libvutil.a(dveclib.o): In function `dvvarr_': dveclib.o(.text+0x6197): undefined reference to `ftn_dabs__' /usr/local/PGI/lib/libvutil.a(dveclib.o): In function `dvpyth_': dveclib.o(.text+0x7950): undefined reference to `ftn_dsqrt__' /usr/local/PGI/lib/libvutil.a(dveclib.o): In function `dvchftx_': dveclib.o(.text+0x11079): undefined reference to `ftn_dcos__' /usr/local/PGI/lib/libvutil.a(dveclib.o): In function `axseg0_': dveclib.o(.text+0x11f6e): undefined reference to `ftn_dabs__' /usr/local/PGI/lib/libvutil.a(dveclib.o): In function `dvlofa_': dveclib.o(.text+0x13651): undefined reference to `ftn_dexp__' Linking: make: [segrtr] Error 1 (ignored) # So this appears to be a bug with PG compiler, loader intrinsic dabs real*8 a, b a = -1.0d0 call apply(a,b,dabs) write(0,*) a, b stop end subroutine apply(x,y,f) real*8 f external f real*8 x, y y = f(x) return end # works without -Msecond_underscore, but not with # One fix will be to generate symbols in dv.c if # -DLINUX_PG defined, updated ~/scripts/soPG vnfeCommand 'cdi; cd PGI; Installz.PG utilio' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG utilmath' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG sv' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG jvs' OK?? vnfeCommand 'cdi; cd PGI; Installz.PG svs' OK?? # RNPL vnfeCommand 'cdi; cd PGI; Installz.PG rnpl' OK?? # Still including /usr/local/lib ?? # So far so good ... continue with linpack, lapack, fftpack ... vnfebgCommand 'cdi; cd PGI; Installz.PG netlib_linpack' OK?? vnfebgCommand 'cdi; cd PGI; Installz.PG netlib_odepack' OK?? vnfebgCommand 'cdi; cd PGI; Installz.PG netlib_fftpack' OK?? #vnfebgCommand 'cdi; cd PGI; test -d lapack && /bin/rm -rf lapack; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; source ~matt/scripts/soPG; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/PGI/netlib_lapack/BLAS/SRC/libblas.a /usr/local/PGI/bin; /bin/cp /var/tmp/install/PGI/netlib_lapack/liblapack.a /usr/local/PGI/lib' OK?? # Should have been vnfebgCommand 'cdi; cd PGI; test -d lapack && /bin/rm -rf lapack; tar xfz /d/vnfe1/home/matt/autoconf/netlib_lapack.tar.gz; cd netlib_lapack; source ~matt/scripts/soPG; make blaslib; make lapacklib; /bin/cp /usr/tmp/install/PGI/netlib_lapack/BLAS/SRC/libblas.a /usr/local/PGI/lib; /bin/cp /var/tmp/install/PGI/netlib_lapack/liblapack.a /usr/local/PGI/lib' OK?? # GRACE vnfeCommand 'cdi; cd PGI; Installz.PG bbh_grace' "CommServer.h", line 98: error: identifier "ostream_withassign" is undefined static ostream_withassign olog ; ^ "CommServer.h", line 165: error: identifier "ostream_withassign" is undefined static ostream_withassign & log(void) { return olog ; } # ALSO NEED LINKS FROM /usr/local/PGI/build/LINUX/ch_p4/lib TO /usr/local/PGI/lib # AND PERHAPS /usr/local/PGI/include # Feeble attempts at patching 'ostream_withassign' deficiencies in # PG compilers failed, olog references wrapped with # DEBUG_PRINT, change top-level Makefile.in so DEBUG_PRINT is NOT # defined -DLINUX_PG -DWant_c_files -DIO_RNPLIO -DSTDC_HEADERS=1 -DHAVE_STRDUP=1 -DHAVE_ATEXIT=1 -DLINUX_PG -DF77_HAS_SYSTEM -DF77_HAS_CHDIR -DHAVE_LIBVS=1 -DHAVE_LIBSV=1 -DHAVE_LIBMPI=1 -L../../../lib -L/usr/tmp/install/PGI/bbh_grace/lib -L/usr/local/lib wave.o vsout.o -o wave -lgrace -lrnpl -lvsso -lsv -lm -lmpi Linking: ../../../lib/libgrace.a(CommRcvServerGhost.o): In function `log__12comm_serviceSFv': CommRcvServerGhost.o(.text+0x24): undefined reference to `comm_service::olog(void)' # Need /bin/rm *.ti in src clean target # Still have undefined reference in CommRcvServerGhost.o ############################################################ Tue Apr 11 01:07:54 PDT 2000 ############################################################ (1) Account for Art Poon nu cat<poon poon:x:9010:9000:Arthur Fu Yan Poon:/d/vnfe1/home/poon:/bin/tcsh END vnNewUsers poon etc # PFt9J6lmkWvo2 vnDistEtc shadow FUTURE_ACTION: Tell Art Poon about his account (whenever their domain comes back??) nslookup nimo.zoology.ubc.ca Server: warp.physics.ubc.ca Address: 142.103.236.1 *** warp.physics.ubc.ca can't find nimo.zoology.ubc.ca: Non-existent host/domain ############################################################ Tue Apr 11 15:02:02 PDT 2000 ############################################################ (1) Account for Corrie Kost nu cat<kost kost:x:9011:9000:Corrie Kost:/d/vnfe1/home/kost:/bin/tcsh END vnNewUsers kost etc # afinVflT2aySE vnDistEtc shadow ############################################################ Wed Apr 12 08:25:37 PDT 2000 ############################################################ (1) Trying to get "realistic" megafloppage rating for single processor. http://www.netlib.org/benchmark/1000d ############################################################ Sun Apr 16 05:58:04 PDT 2000 ############################################################ (1) vn20 down see README.CRASH (CRASH_66) set N=vn20 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" vn59 accidentally reset (power cable) set N=vn59 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" vnMpptest vn20 vn59 ############################################################ Tue Apr 18 08:29:04 PDT 2000 ############################################################ (1) In machine room with new network card for vn20 set N=vn20 ssh root@$N "hostname -s; uname -a; date; vnSetdate; ntptimeset; jj whod; ruptime | grep $N" vnMpptest vn20 vn1 ssh matt@vn20 killall mpptest ssh matt@vn20 killall mpptest ssh matt@vn1 killall mpptest vnMpptest vn13 vn14 ssh matt@vn13 killall mpptest ssh matt@vn13 killall mpptest ssh matt@vn14 killall mpptest ############################################################ Fri Apr 21 13:28:22 PDT 2000 ############################################################ # MAKING sensible links to PGI version of mpich vnfeCommand ln -s /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a ############################################################ Fri Apr 21 13:36:54 PDT 2000 ############################################################ (1) bbh_grace with rnpl/wave2d example now compiles and runs on cluster (Modified CommServer.cpp and CommServer.h courtesy of Manish via Scott H) # Timing: PG vs GNU 200 steps level 12 cd debugPGI/rnpl/wave2d Mpirun 16 wave id_12 PG: 159.260u 11.980s 2:55.95 97.3% 0+0k 0+0io 17651pf+0w PG: 157.750u 14.980s 2:55.58 98.3% 0+0k 0+0io 17602pf+0w cd debug/rnpl/wave2d Mpirun 16 wave id_12_200 GNU: 226.140u 18.050s 4:10.93 97.3% 0+0k 0+0io 17582pf+0w ############################################################ Fri Apr 28 11:42:32 PDT 2000 ############################################################ (1) Problems with the PGI Compilers /usr/local/PGI/bin/mpif90 -fast -Msecond_underscore -c fpi.f90 pgf90-linux86: LICENSE MANAGER PROBLEM: The desired vendor daemon is down -97,380:111 (Connection refused) make: [fpi.o] Error 1 (ignored) /usr/local/PGI/bin/mpif90 -fast -Msecond_underscore fpi.o -o fpi Linking: /usr/bin/ld: cannot open fpi.o: No such file or directory make: [fpi] Error 1 (ignored) [root@vnfe1]# pwd /tmp/demoMPIPGI [root@vnfe1]# ls Archive/ Makefile Makefile.WWW PI21153 fpi.f90 mfile mfile.vn1 ############################################################ Wed May 3 11:40:26 PDT 2000 ############################################################ (1) Account for Vadim Astakhov nu cat<vadim vadim:x:9012:9000:Vadim Astakhov:/d/vnfe1/home/vadim:/bin/bash END vnNewUsers vadim etc # QMjfBX3Tagl9M vnDistEtc shadow ############################################################ Wed May 3 11:46:02 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn ts setenv TS "2000:05:03:1145" mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.tar Rtop.2000:05:03:1145 gzip Rtop.tar # ftp to laplace:/usr2/people/matt/system/vnArchive gunzip -c Rtop.tar.gz | tar xf - ############################################################ Wed May 3 14:53:43 PDT 2000 ############################################################ (1) Account for Jordana Tzenova nu cat<tzenova tzenova:x:9013:9000:Jordana Tzenova:/d/vnfe1/home/tzenova:/bin/tcsh END vnNewUsers tzenova etc # O4HX0/ACnVI/Q vnDistEtc shadow ############################################################ Thu May 11 12:00:26 PDT 2000 ############################################################ (1) Account for Zheqiong Wang nu cat<zheqiong zheqiong:x:1826:1800:Zheqiong Wang:/d/vnfe3/home/zheqiong:/bin/tcsh END vnNewUsers zheqiong etc # fCi7rTnQWAVcI vnDistEtc shadow ############################################################ Fri May 19 11:44:48 PDT 2000 ############################################################ vnallbgCommand 'cdi; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz .; Installz' vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' vnallbgCommand 'cdi; Installz rnpl' vnfeCommand 'cdi; cd PGI; Installz.PG rnpl' ############################################################ Fri May 19 11:58:29 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn ts setenv TS "2000:05:19:1158" mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.tar Rtop.2000:05:19:1158 gzip Rtop.tar # ftp to laplace:/usr2/people/matt/system/vnArchive gunzip -c Rtop.tar.gz | tar xf - ############################################################ Fri May 19 13:10:12 PDT 2000 ############################################################ (1) Account for John Dubinski nu cat<dubinski dubinski:x:653:600:John Dubinski:/d/vnfe1/home/dubinski:/bin/tcsh END vnNewUsers dubinski etc # 7GKzCev8I3ELw vnDistEtc shadow ############################################################ Wed May 24 16:03:53 PDT 2000 ############################################################ (1) Account for David Garfinkle nu cat<garfinkl garfinkl:x:654:600:David Garfinkle:/d/vnfe1/home/garfinkl:/bin/csh END vnNewUsers garfinkl etc # o2U/EC0NMv7Ro vnDistEtc shadow ############################################################ Wed May 31 15:45:44 PDT 2000 ############################################################ (1) Possible DDOS originating from within cluster reported by Mike ??? at UBC IT services, 822-1210 Ron P. points me to page re tool http://www.nipc.gov/advis00-044.htm vnallbgCommand 'cdi; cd find_ddos; find_ddos -y -g files -l LOG -p /tmp /' vnallCommand pstree > /tmp/PSTREE vnallCommand w > /tmp/W vnallCommand 'cat /usr/tmp/install/find_ddos/LOG' > /tmp/DDOS Looks clean (2) tcpdump reveals 22:03:42.990713 fl-teq1a-240-232.pbc.adelphia.net > 142.103.237.255: icmp: echo request (DF) 22:03:42.990805 vn1.physics.ubc.ca > fl-teq1a-240-232.pbc.adelphia.net: icmp: echo reply ... packet spoofing? packets apparently > 1024 bytes vnallbgCommand 'cat /proc/sys/net/ipv4/icmp_echo_ignore_all' vnallbgCommand 'echo 1 > /proc/sys/net/ipv4/icmp_echo_ignore_all' (3) Switch apparently shut down (or did IT services disconnect me from the outside world?) In machine room, everything up, except no link light on A1 port (external network) Turns out that Dennis O'Reilly turned off port seconds after I implemented the fix! Turned it back on, and all seems well ... no pings (4) ... well not quite Thu Jun 1 07:07:58 PDT 2000 ... every 20 seconds ... 07:05:03.547412 dialup-209.245.139.229.SanJose1.Level3.net > 142.103.237.255: icmp: echo request 07:05:24.036361 dialup-209.245.139.229.SanJose1.Level3.net > 142.103.237.255: icmp: echo request ############################################################ SEE README.DDOS FOR ADDITIONAL INFORMATION ############################################################ ############################################################ Wed May 24 16:03:53 PDT 2000 ############################################################ (1) Account for Hanna R nu cat<hkr hkr:x:623:600:Hanna Ruotsalainen:/d/vnfe1/home/hkr:/bin/tcsh END vnNewUsers hkr etc # $1$jCuWZ367$gTcMGY3oev5.PFPHo74xZ. vnDistEtc shadow ############################################################ Thu Jun 1 19:12:51 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn tsminus setenv TS "2000-06-01-1913" mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # ftp to laplace:/usr2/people/matt/system/vnArchive # Renamed all archive names ':' -> '-' ############################################################ Fri Jun 2 10:13:15 PDT 2000 ############################################################ (1) Changing Hanna's encrypted passwd etc vs # $1$r/.0/IuW$G2x.EKamWOKk0Br4mjcMc0 vnDistEtc shadow # $1$r/.0/IuW$G2x.EKamWOKk0Br4mjcMc0 ############################################################ Fri Jun 2 12:04:07 PDT 2000 ############################################################ (1) Turning ON regular echo, turning OFF vnallbgCommand 'echo 1 > /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts' vnallbgCommand 'cat /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts' vnallbgCommand 'echo 0 > /proc/sys/net/ipv4/icmp_echo_ignore_all' vnallbgCommand 'cat /proc/sys/net/ipv4/icmp_echo_ignore_all' # Looks ok ping 142.103.237.255 only echoes from router and switch ############################################################ Mon Jun 5 07:16:59 PDT 2000 ############################################################ (1) Installing cliser # As matt@laplace cda cd cliser Rx # As matt@vnfe1 vnallbgCommand 'cdi; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz .; Installz' vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' vnallbgCommand 'cdi; Installz cliser' vnfeCommand 'cdi; cd PGI; Installz.PG cliser' ############################################################ Thu Jun 8 13:33:22 PDT 2000 ############################################################ (1) Installing bbh_xyz_stream # As matt@vnfe1 vnallbgCommand 'cdi; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz .; Installz' vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' vnallbgCommand 'cdi; Installz bbh_xyz_stream' vnfeCommand 'cdi; cd PGI; Installz.PG bbh_xyz_stream' ############################################################ Thu Jun 8 13:40:47 PDT 2000 ############################################################ (1) License manager down on vnfe1 /etc/rc.d/rc.local killall lmgrd killall lmgrd /usr/local/pgi/linux86/bin/lmgrd.rc start ############################################################ Thu Jun 8 13:44:03 PDT 2000 ############################################################ (1) bbh_xyz_stream needs extra make on vnfe[123] vnfeCommand 'cdi; cd PGI; cd bbh_xyz_stream; make' ############################################################ Thu Jun 8 16:18:16 PDT 2000 ############################################################ (1) Adding Forestry users () From: "Felix Yao" To: Dear Prof. Choptuik, As our supervisor, Dr.Ricardo Foschi, has discussed with you on May 18, = we would like to apply for a few accounts for the cluster. Attached please find four filled application forms. Regards, Felix Yao Yintang Wang Richardo O. Foschi rof Hong Li hli Yin-Tang Wang ytwang Felix Yao fyao cat>foschi_group<demo demo:x:699:600:Demonstration Account:/d/vnfe1/home2/demo:/bin/tcsh END vnNewUsers demo # Login as demo, set password etc # $1$nO.HEvOJ$5kQdq8tyeHHqMEFw1XByv/ vnDistEtc shadow Account for demo[123] cat<demo123 demo1:x:691:600:Demonstration Account:/d/vnfe1/home2/demo1:/bin/tcsh demo2:x:692:600:Demonstration Account:/d/vnfe1/home2/demo2:/bin/tcsh demo3:x:693:600:Demonstration Account:/d/vnfe1/home2/demo3:/bin/tcsh END vnNewUsers demo123 # $1$edbVRXXA$PCJUdV2Ie4LUxQJePofsL0 # $1$kw7AQGuf$1MazABEpdi9SP018Qx4zG/ # $1$PZOP3.9c$5SDTuZbCx3MZW4bPrRiFr1 ############################################################ Wed Jun 21 14:27:53 PDT 2000 ############################################################ (1) Re-Installing bbh_xyz_stream # As matt@vnfe1 vnallbgCommand 'cdi; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz .; Installz' vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG .; Installz.PG' vnallbgCommand 'cdi; Installz bbh_xyz_stream' vnfeCommand 'cdi; cd PGI; Installz.PG bbh_xyz_stream' ############################################################ Fri Jun 23 11:55:23 PDT 2000 ############################################################ (1) Adding /etc/hosts entries vis a vis demo 142.103.76.181 sgi1.longhouse.ubc.ca sgi1 142.103.76.182 sgi2.longhouse.ubc.ca sgi2 142.103.76.184 sgi3.longhouse.ubc.ca sgi3 # /etc/hosts # /etc/hosts.allow vnDistEtc hosts hosts.allow vnallbgCommand 'killall -HUP inetd' DNS1 137.82.1.1 DNS2 137.82.28.3 search domain physics.ubc.ca netmask? # As root@laplace # root@laplace:/etc/hosts.allow scp /etc/hosts.allow root@sgi1.physics.ubc.ca:/etc/hosts.allow scp /etc/hosts.allow root@sgi2.physics.ubc.ca:/etc/hosts.allow ############################################################ Wed May 24 16:03:53 PDT 2000 ############################################################ (1) Account for Michael Mitton nu cat<mmitton mmitton:x:9018:9000:Michael David Mitton:/d/vnfe1/home/mmitton:/bin/tcsh END vnNewUsers mmitton etc # $1$gRVTC1xQ$seyI9fKkLo3irxVAFfFc61 vnDistEtc shadow ############################################################ Tue Jun 27 12:18:38 PDT 2000 ############################################################ (1) Account for Mark Halpern nu cat<halpern halpern:x:9019:9000:Mark Halpern:/d/vnfe1/home/halpern:/bin/tcsh END vnNewUsers halpern etc # SIIpa1whqzwAc vnDistEtc shadow ############################################################ Tue Jul 4 12:22:35 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # ftp Rtop.2000-07-04-1222.tar.gz to # laplace:/usr2/people/matt/system/vnArchive ############################################################ Tue Jul 4 15:26:12 PDT 2000 ############################################################ (1) Testing Hanna's accounting script, vnAccount see README.VNACCOUNT ############################################################ Wed Jul 5 17:14:45 PDT 2000 ############################################################ (1) Account for Shuo Wang nu cat<shuo shuo:x:9020:9000:Shuo Wang:/d/vnfe3/home/shuo:/bin/bash END vnNewUsers shuo etc # 1wg9knvRMYL3c vnDistEtc shadow ############################################################ Thu Jul 6 06:57:53 PDT 2000 ############################################################ SEE README.CRASH: CRASH_68 Thu Jul 6 06:57:53 PDT 2000 # vnfe1 came back up with only one proc recognized again # rebooted, changed BIOS setting, rebooted [root@vnfe1]# date `ssh root@vnfe2 date +%m%d%H%M%Y.%S` Thu Jul 6 09:32:54 PDT 2000 [root@vnfe1]# grep proc /proc/cpuinfo processor : 0 processor : 1 ############################################################ Thu Jul 6 10:20:56 PDT 2000 ############################################################ (1) Running vnUsage manually on vnfe1:/home, will try 'niceing' du command nice +19 [matt@vnfe1 ~/scripts]$ more usage.batch #!/bin/sh USAGE_FILE="Usage" #DU_COMMAND="nice -n +19 /bin/du -ks" DU_COMMAND="nice -n +19 du -ks" # -h doesn't work well with sort #DU_COMMAND="du -hs" HEADER="<`pwd` usage: `date`>" case $1 in -) (echo $HEADER; $DU_COMMAND * | sort -rn ) ;; *) if touch $USAGE_FILE 2> /dev/null; then (echo $HEADER; $DU_COMMAND * | sort -rn) > $USAGE_FILE; cat $USAGE_FILE else (echo $HEADER; $DU_COMMAND * | sort -rn ) fi ;; esac # As root@vnfe1 cd /home usage.batch # Finished OK, trying vnUsage as root@vnfe1 vnUsage # Also finished OK ############################################################ Thu Jul 6 12:46:23 PDT 2000 ############################################################ (1) As long as Dave has us paranoid re the SGIs, try running cops on vnfe1 # As matt@vnfe1 cda ssh laplace 'cda; scp *cops*gz matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/autoconf' # As root@vnfe1 cdi tar zxf /d/vnfe1/home/matt/autoconf/cops104+.tar.gz; ############################################################ First pass ############################################################ cdi; cd cops_104+; ./reconfig; make; ./cops -v -s . -b cops.err !!ssh root@vnfe1.physics.ubc.ca cat /usr/tmp/install/cops_104+/vnfe1/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 12:49:27 PDT 2000 from host vnfe1.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** **** is_able.chk **** Warning! /etc/security is _World_ readable! ls -ltd /etc/security; chmod 700 /etc/security; ls -ltd /etc/security vnallbgCommand 'ls -ltd /etc/security; chmod 700 /etc/security; ls -ltd /etc/security' **** rc.chk **** **** cron.chk **** **** group.chk **** Warning! Group file, line 24, does not have 4 fields: cdwriters::64:: **** home.chk **** Warning! User lists's home directory /dev/null is not a directory! (mode 020666) /d/vnfe1/home/matt/system/vn/image/master/etc/passwd -> lists:x:500:500:BeroList:/: vnDistEtc passwd **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** ############################################################ Second pass ############################################################ cdi; cd cops_104+; ./cops -v -s . -b cops.err !!ssh root@vnfe1.physics.ubc.ca cat /usr/tmp/install/cops_104+/vnfe1/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 13:00:15 PDT 2000 from host vnfe1.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** **** is_able.chk **** **** rc.chk **** **** cron.chk **** **** group.chk **** Warning! Group file, line 24, does not have 4 fields: cdwriters::64:: # Fixed /d/vnfe1/home/matt/system/vn/image/master/etc/group vnDistEtc group **** home.chk **** **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** ############################################################ Third pass ############################################################ # As root@vnfe1 vnCops !!ssh root@vnfe1.physics.ubc.ca cat /usr/tmp/install/cops_104+/vnfe1/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 13:14:56 PDT 2000 from host vnfe1.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** **** is_able.chk **** **** rc.chk **** **** cron.chk **** **** group.chk **** **** home.chk **** **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** #----------------------------------------------------------- ssh vnfe2 vnCops !!ssh root@vnfe2.physics.ubc.ca cat /usr/tmp/install/cops_104+/vnfe2/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 13:17:31 PDT 2000 from host vnfe2.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** Warning! /dev/cdrom is _World_ readable! vnallbgCommand 'chmod og-xrw /dev/cdrom' vnallCommand 'ls -lt /dev/cdrom' **** is_able.chk **** **** rc.chk **** **** cron.chk **** **** group.chk **** **** home.chk **** **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** #----------------------------------------------------------- ssh vnfe3 vnCops !!ssh root@vnfe3.physics.ubc.ca cat /usr/tmp/install/cops_104+/vnfe3/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 13:20:26 PDT 2000 from host vnfe3.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** Warning! /dev/cdrom is _World_ readable! **** is_able.chk **** **** rc.chk **** **** cron.chk **** **** group.chk **** **** home.chk **** **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** #----------------------------------------------------------- ssh vn1 vnCops # IS TAKING A LONG TIME !!ssh root@vn1.physics.ubc.ca cat /usr/tmp/install/cops_104+/vn1/2000_Jul_6 ATTENTION: Security Report for Thu Jul 6 13:43:27 PDT 2000 from host vn1.physics.ubc.ca, COPS v. Version 1.04+ **** root.chk **** **** dev.chk **** **** is_able.chk **** **** rc.chk **** **** cron.chk **** **** group.chk **** **** home.chk **** **** passwd.chk **** **** user.chk **** **** misc.chk **** **** ftp.chk **** **** pass.chk **** **** kuang **** **** bug.chk **** vnallbgCommand vnCops TO_DO vnallCommand 'cat /usr/tmp/install/cops_104+/`hostname -s`/2000_Jul_6' > /tmp/COPS ssh matt@vnfe1.physics.ubc.ca cat /tmp/COPS > README.COPS # Looks OK ############################################################ Thu Jul 6 14:57:12 PDT 2000 ############################################################ (1) New guys from Forestry can't log-in hli ytwang fyao ############################################################ Fri Jul 7 05:02:37 PDT 2000 ############################################################ (1) vnfe1 close to hung again Culprit appears to be 'slocate' at job vnallbgCommand 'killall slocate; killall slocate; killall tmpwatch; killall tmpwatch' TO_DO vnCommand '/bin/rm /etc/cron.daily/slocate.cron' ############################################################ Tue Jul 11 11:57:51 PDT 2000 ############################################################ (1) What's wrong with this? Can't extract %cpu out of ps, How is cryptic option C (which may be key) specified? [matt@vn46 ~]$ !! | grep suqin; sleep 5; !! | grep suqin ps -eo user,pid,cputime,%cpu --sort cputime | more | grep suqin ; sleep 5 ; ps -eo user,pid,cputime,%cpu --sort cputime | more | grep suqin suqin 9301 00:00:00 0.0 suqin 9311 00:09:44 0.0 suqin 9312 00:09:42 0.0 suqin 9313 00:09:40 0.0 suqin 9314 00:09:38 0.0 suqin 9337 00:00:00 0.0 suqin 9301 00:00:00 0.0 suqin 9311 00:09:45 0.0 suqin 9312 00:09:43 0.0 suqin 9313 00:09:42 0.0 suqin 9314 00:09:39 0.0 suqin 9337 00:00:00 0.0 ############################################################ Tue Jul 11 14:24:47 PDT 2000 ############################################################ SEE README.CRASH: CRASH_69 # vnfe1 hung apparently due to SCSI error (tape drive) ############################################################ Thu Jul 13 09:02:46 PDT 2000 ############################################################ (1) Trying to benchmark and profile 'graxi' on vnfe1, after Frans points out that Alphas apparently run code about *7* times as fast (estimated for equal-clock basis). Easiest to modify root's Installz.PG script. # As matt@vnfe1 cd /d/vnfe1/home/matt/system/vn/image/master/install cp Installz.PG Installz.PG.inplace # Hack on Installz.PG.inplace mx Installz.PG.inplace vnfeCommand 'cdi; cd PGI; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.PG.inplace .; Installz.PG.inplace' vnfeCommand 'cdi; cd PGI; Installz.PG.inplace hlc_graxi.N' # Didn't find blas ... Probably didn't explicitly install it when # LAPACK was built vnfeCommand 'cd /usr/local/lib; ls -lt lib*blas*.a' >>> Executing as root@142.103.237.225 -rw-r--r-- 1 root root 547286 Nov 19 1999 libblas.a >>> Executing as root@142.103.237.226 -rw-r--r-- 1 root root 547286 Nov 19 1999 libblas.a >>> Executing as root@142.103.237.227 -rw-r--r-- 1 root root 547286 Nov 18 1999 libblas.a vnfeCommand 'cd /usr/local/PGI/lib; ls -lt lib*blas*.a' [matt@vnfe1 PGI]$ vnfeCommand 'cd /usr/local/PGI/lib; ls -lt lib*blas*.a' >>> Executing as root@142.103.237.225 ls: No match. >>> Executing as root@142.103.237.226 ls: No match. >>> Executing as root@142.103.237.227 ls: No match. vnfeCommand 'cd /usr/local/lib; ls -lt lib*pack*.a' >>> Executing as root@142.103.237.225 -rw-r--r-- 1 root root 263130 Feb 23 20:20 libfftpack.a -rw-r--r-- 1 root root 4109112 Nov 19 1999 liblapack.a -rw-r--r-- 1 root root 638950 Nov 16 1999 liblinpack.a -rw-r--r-- 1 root root 237786 Nov 16 1999 libodepack.a >>> Executing as root@142.103.237.226 -rw-r--r-- 1 root root 4109112 Nov 19 1999 liblapack.a -rw-r--r-- 1 root root 263130 Nov 19 1999 libfftpack.a -rw-r--r-- 1 root root 638950 Nov 17 1999 liblinpack.a -rw-r--r-- 1 root root 237786 Nov 17 1999 libodepack.a >>> Executing as root@142.103.237.227 -rw-r--r-- 1 root root 4109112 Nov 18 1999 liblapack.a -rw-r--r-- 1 root root 263130 Nov 18 1999 libfftpack.a -rw-r--r-- 1 root root 638950 Nov 16 1999 liblinpack.a -rw-r--r-- 1 root root 237786 Nov 16 1999 libodepack.a vnfeCommand 'cd /usr/local/PGI/lib; ls -lt lib*pack*.a' [matt@vnfe1 PGI]$ vnfeCommand 'cd /usr/local/PGI/lib; ls -lt lib*pack*.a' >>> Executing as root@142.103.237.225 -rw-r--r-- 1 root root 4873920 Apr 6 16:35 liblapack.a -rw-r--r-- 1 root root 509990 Apr 6 16:30 libfftpack.a -rw-r--r-- 1 root root 406386 Apr 6 16:29 libodepack.a -rw-r--r-- 1 root root 806224 Apr 6 16:29 liblinpack.a >>> Executing as root@142.103.237.226 -rw-r--r-- 1 root root 4873920 Apr 6 16:35 liblapack.a -rw-r--r-- 1 root root 509990 Apr 6 16:30 libfftpack.a -rw-r--r-- 1 root root 406386 Apr 6 16:29 libodepack.a -rw-r--r-- 1 root root 806224 Apr 6 16:29 liblinpack.a >>> Executing as root@142.103.237.227 -rw-r--r-- 1 root root 4873920 Apr 6 16:35 liblapack.a -rw-r--r-- 1 root root 509990 Apr 6 16:30 libfftpack.a -rw-r--r-- 1 root root 406386 Apr 6 16:29 libodepack.a -rw-r--r-- 1 root root 806224 Apr 6 16:29 liblinpack.a # Previous entries in this file make it apparent that PGI *pack* installation # was NOT rigorously checked, at line 6765 we see /bin/cp /usr/tmp/install/PGI/netlib_lapack/BLAS/SRC/libblas.a /usr/local/PGI/bin vnfeCommand 'ls /usr/local/PGI/bin/lib*.a' vnfeCommand 'mv /usr/local/PGI/bin/lib*.a /usr/local/PGI/lib' [root@vnfe1]# vnfeCommand 'cd /usr/local/PGI/lib; ls -lt lib*blas*.a' >>> Executing as root@142.103.237.225 -rw-r--r-- 1 root root 523670 Apr 6 16:35 libblas.a >>> Executing as root@142.103.237.226 -rw-r--r-- 1 root root 523670 Apr 6 16:35 libblas.a >>> Executing as root@142.103.237.227 -rw-r--r-- 1 root root 523670 Apr 6 16:35 libblas.a # Second try vnfeCommand 'cdi; cd PGI; Installz.PG.inplace hlc_graxi.N' vnfeCommand 'cdi; cd PGI/hlc_graxi.N; make test' OK # As matt@vnfe1 cd /d/vnfe1/home/matt/system/vn/image/master/install cp Installz Installz.inplace # Hack on Installz.inplace mx Installz.inplace vnfeCommand 'cdi; CP /d/vnfe1/home/matt/system/vn/image/master/install/Installz.inplace .; Installz.inplace' vnfeCommand 'cdi; Installz.inplace hlc_graxi.N' vnfeCommand 'cdi; cd hlc_graxi.N; make test' OK date; vnfeCommand 'cdi; cd hlc_graxi.N; make test'; date Thu Jul 13 09:26:52 PDT 2000 Thu Jul 13 09:27:51 PDT 2000 60 secs date; vnfeCommand 'cdi; cd PGI/hlc_graxi.N; make test'; date Thu Jul 13 09:26:52 PDT 2000 Thu Jul 13 09:27:23 PDT 2000 31 secs # Appears to be about twice as fast under PGI, so compilers may # be most of the story? # 12x24x128 PGI: 57.820u 0.020s 0:58.53 98.8% 0+0k 0+0io 248pf+0w GNU: 154.790u 0.120s 2:35.58 99.5% 0+0k 0+0io 212pf+0w # 24x48x32 PGI: 31.320u 0.060s 0:32.17 97.5% 0+0k 0+0io 290pf+0w GNU: 85.460u 0.250s 1:26.40 99.2% 0+0k 0+0io 254pf+0w # 48x96x32 PGI: 98.070u 0.090s 1:38.18 99.9% 0+0k 0+0io 407pf+0w GNU: 270.510u 0.160s 4:30.70 99.9% 0+0k 0+0io 371pf+0w # 96x192x16 (steps = 1, hence faster than expected) PGI: 86.110u 0.490s 1:28.60 97.7% 0+0k 0+0io 871pf+0w GNU: 237.040u 0.500s 3:59.88 99.0% 0+0k 0+0io 835pf+0w # 96x192x32 PGI: 168.160u 3.290s 2:57.72 96.4% 0+0k 0+0io 871pf+0w 2.5 GNU: 471.250u 2.280s 8:03.09 98.0% 0+0k 0+0io 835pf+0w 7.1 OSF1: 66.025u 0.263s 1:07.49 98.2% 0+108k 1+312io 0pf+0w 1.0 # 192x384x16 (Too big for interactive use on Alphas) ############################################################ Thu Jul 13 09:02:46 PDT 2000 ############################################################ TO_DO (1) MODIFY /usr/local/pgi/linux86/bin/lmgrd -c /usr/local/pgi/license.dat STARTUP SO THAT LICENSE MANAGER RUNS AS NON-PRIVILEDGED USER cd /home/matt/system/vn/image/master/etc scp root@vnfe1.physics.ubc.ca:/etc/rc.d/rc.local rc.local.fe vnfeCommand 'cd /etc/rc.d; Arc rc.local; scp matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/rc.local.fe rc.local' vnfeCommand 'grep lmgr /etc/rc.d/rc.local' vnfeCommand 'chown adm.sys /usr/local/pgi/flexlm.log' # Killed and restarted demons vnfeCommand '/bin/su adm -c "/usr/local/pgi/linux86/bin/lmgrd.rc start"' ############################################################ Sat Jul 15 06:54:58 PDT 2000 ############################################################ (1) Got series of messages 5 Fri Jul 14 21:00 PGI compilers . . . 22 Fri Jul 14 23:50 PGI compilers Re PGI compilers Need to modify vnTestPGI so that it restarts license demon as adm ############################################################ Mon Jul 17 16:26:02 PDT 2000 ############################################################ (1) Account for Eric Nodwell nu cat<nodwell nodwell:x:9021:9000:Eric Nodwell:/d/vnfe1/home/nodwell:/bin/bash END vnNewUsers nodwell etc # $1$cAq0sW25$097UsQv57keplSldV8KZv. vnDistEtc shadow ############################################################ Tue Jul 18 04:18:46 PDT 2000 ############################################################ (1) 'slocate's on front-ends still close to saturating machines mkdir /home/matt/system/vn/image/master/etc/cron.daily.fe cd !$ scp root@vnfe1:/etc/cron.daily/slocate.cron . vnfeCommand '/bin/rm /etc/cron.daily/slocate.cron' ############################################################ Thu Jul 20 14:51:40 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-07-20-1451.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-07-20-1451 ############################################################ Fri Jul 21 09:27:03 PDT 2000 ############################################################ (1) Account for Gerald Lim (SFU Phys grad student) nu cat<ghlim ghlim:x:9022:9000:Gerald Lim:/d/vnfe1/home/ghlim:/bin/bash END vnNewUsers ghlim etc; sola; vs FUTURE ACTION # DT3HOVlzXCcG6 vnDistEtc shadow ############################################################ Thu Jul 27 01:43:59 PDT 2000 ############################################################ vn42 down 1:17 see README.CRASH (CRASH_70) # No apparent reason for hang (stocki, fengxs(?) running) ############################################################ Thu Jul 27 10:31:23 PDT 2000 ############################################################ (1) Last Rtop archive was screwed up, didn't recreate Rtop. Fortunately am still mirroring ALL Rtop output at matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/vnshadow/Rtop # Last file in Rtop.2000-07-20-1451.tar.gz 2000:07:20:1443.52 # First file in Rtop 2000:07:27:0212.42 As matt@laplace cd vnArchive mkdir Rtop.2000-07-27-0212 # Hacked list (/bin/ls -1) in /tmp/RTOP pre cp < /tmp/RTOP | post /usr2/people/matt/system/vnArchive/Rtop.2000-07-27-0212 | csh cd vnArchive tz Rtop.2000-07-27-0212 As matt@vnfe1 cd vnArchive scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/vnArchive/Rtop.2000-07-27-0212.tar.gz . ############################################################ Tue Aug 8 06:55:02 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-08-08-0653.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-08-08-0653 ############################################################ Sat Aug 12 08:08:30 PDT 2000 ############################################################ vn11 down 9:07 vn2 down 8:44 see README.CRASH (CRASH_71, CRASH_72) ############################################################ Fri Aug 18 08:59:30 PDT 2000 ############################################################ (1) New Accounts for Matthew Case: SFU grad student, Igor Herbut supervisor Jorde Cohen: SFU grad student, Mike Plishke supervisor nu cat<mcase_jcohena mcase:x:9023:9000:Matthew Case:/d/vnfe1/home/mcase:/bin/ksh jcohena:x:9024:9000:Jordi Cohen:/d/vnfe1/home/jcohena:/bin/tcsh END vnNewUsers mcase_jcohena etc; sola; vs FUTURE ACTION mcase etc; sola; vs # 9xtqp1n5.r80A vnDistEtc shadow jcohena etc; sola; vs # gfHMQrAN/7hRQ vnDistEtc shadow ############################################################ Mon Aug 21 12:02:38 PDT 2000 ############################################################ (1) New Account for Dan Vernon: SFU grad student, Mike Plishke supervisor nu cat<dvernon dvernon:x:9025:9000:Dan Vernon:/d/vnfe1/home/dvernon:/bin/tcsh END vnNewUsers dvernon etc; sola; vs nu cd Blurbs cp jcohena dvernon FUTURE ACTION etc # we0R0augR5ZDY vnDistEtc shadow ############################################################ Tue Aug 22 10:33:55 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-08-22-1034.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-08-22-1034 ############################################################ Tue Aug 22 10:43:08 PDT 2000 ############################################################ (1) Mounting /d/vnfe[123]/{home,home2} on NFS machines # As matt@vnfe1 etc scp root@vnfe1:/etc/exports . # Modified /home vn*.physics.ubc.ca(rw,no_root_squash) bh*.physics.ubc.ca(rw,no_root_squash) laplace.physics.ubc.ca(rw,no_root_squash) godel.physics.ubc.ca(rw,no_root_squash) sgi*.physics.ubc.ca(rw,no_root_squash) /home2 vn*.physics.ubc.ca(rw,no_root_squash) bh*.physics.ubc.ca(rw,no_root_squash) laplace.physics.ubc.ca(rw,no_root_squash) godel.physics.ubc.ca(rw,no_root_squash) sgi*.physics.ubc.ca(rw,no_root_squash) vnfeDistEtc exports vnfeCommand 'exportfs -av' ############################################################ Fri Aug 25 05:15:17 PDT 2000 ############################################################ (1) New Account for Gord McTaggart-Cowan: Bushe's MSc student nu cat<gmctc gmctc:x:1611:1600:Gord McTaggart-Cowan:/d/vnfe3/home/gmctc:/bin/bash END vnNewUsers gmctc etc; sola; vs nu cd Blurbs cp dvernon gmctc FUTURE ACTION etc # $1$pE6/nv.Q$rfPZoD7zeZQIQKT2QD.bl1 vnDistEtc shadow ############################################################ Wed Aug 30 05:56:58 PDT 2000 ############################################################ (1) Problem with vn55 64: vn55 up 163+23:05, 0 users, load 14.60, 14.51, 14.17 Can still ping, but can't ssh, telnet see README.CRASH CRASH_73 ############################################################ Fri Sep 1 02:07:12 PDT 2000 ############################################################ (1) New Account for Dave Michelson Adjunct Professor, Electrical and Computer Engineering nu cat<davem davem:x:9026:9000:Dave Michelson:/d/vnfe1/home/davem:/bin/bash END vnNewUsers davem etc; sola; vs nu cd Blurbs cp dvernon davem FUTURE ACTION etc # UjvYJBTgHcHtQ vnDistEtc shadow ############################################################ Fri Sep 1 15:28:18 PDT 2000 ############################################################ (1) New Account for Karn Kallio SFU Grad Student, Howard Trottier supervisor nu cat<karn karn:x:9027:9000:Karn Kallio:/d/vnfe1/home/karn:/bin/bash END vnNewUsers karn etc; sola; vs nu cd Blurbs cp dvernon karn etc # JZAURjyGJMXdU vnDistEtc shadow ############################################################ Wed Sep 6 06:25:19 PDT 2000 ############################################################ (1) New account for Joseph Emerson, SFU Grad Student, Leslie Ballentine supervisor nu cat<josephe josephe:x:9028:9000:Joseph V. Emerson:/d/vnfe1/home/josephe:/bin/bash END vnNewUsers josephe etc; sola; vs nu cd Blurbs cp dvernon josephe etc # DhMIObEHn8U.E vnDistEtc shadow ############################################################ Wed Sep 6 06:46:43 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-09-06-0646.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-09-06-0646 ############################################################ Thu Sep 7 18:31:10 PDT 2000 ############################################################ (1) rwhod on vn6 died, manually restarted, also killed and restarted klogd which appeared to have run amok. suqin running pnorandom.exe ############################################################ Sat Sep 9 13:32:59 PDT 2000 ############################################################ (1) rwhod problems again [matt@vnfe1 ~]$ down vn1 down 2:50 vn12 down 3:02 vn31 down 3:00 vn32 down 3:03 ... can ssh into all ... vn1 ... klogd run amok ... retstarted klogd, rwhod vn12 ... restarted rwhod vn31 ... restarted rwhod vn32 ... restarted rwhod vn1 ... klogd run amok again ... retstarted klogd, rwhod ############################################################ Tue Sep 12 12:04:02 PDT 2000 ############################################################ (1) rwhod problems with vnfe3 # syslogd out of control ? kill -9 314 (2) Tried re-creating /var/spool/rwho Log contains many instances of 'Socket operation on non-socket' starting with Sep 12 10:27:26 vnfe3 rwhod[451]: sendto(142.103.237.255): Socket operation on non-socket Try to get things going with linuxconf ... didn't work (3) daub 20 suqin 13 mcase 11 jcohena 11 tzenova 5 stocki 5 fransp 5 minghe 4 zheqiong 2 ytwang 2 mmitton 2 josephe 2 fengxs 2 bian 2 roman 1 foreach u (daub suqin mcase jcohena tzenova stocki fransp minghe zheqiong ytwang mmitton josephe fengxs bian roman) cd ~$u pwd | grep -v vnfe3 /home/suqin /home/zheqiong /home/ytwang /home/fengxs /home/bian end ############################################################ Thu Sep 14 09:32:04 PDT 2000 ############################################################ Hi. I'm afraid I've forgotten my password for my account, dvernon, on the vn cluster. Could you please set my encrypted password to NelQxNQOR.Svg I apologize for my forgetfulness; I'll try not to do this again. Dan Vernon etc vs vnDistEtc shadow ############################################################ Sun Sep 17 14:33:14 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-09-17-1433.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-09-07-1433 ############################################################ Tue Sep 19 13:32:03 PDT 2000 ############################################################ (1) Generating Usage statistics cd system/vnshadow/Rtop [matt@vnfe1 Rtop]$ mkdir D1999 [matt@vnfe1 Rtop]$ LS | grep -v '^2000' | pre mv | post D1999 | csh [matt@vnfe1 vnshadow]$ ls Rtop.1999/ Rtop.2000/ cd Rtop.2000 LS > /tmp/LS 2000:01:01:0000.47 2000:01:01:0008.33 . . . 2000:09:19:1003.52 2000:09:19:1013.25 44298 files vnAccount (makes ..in place.. ! with root data.html) # RTFM ! pwd mkdir ../vnAccount LS | grep -v '^2000' | pre mv | post '../vnAccount' | csh # As matt@laplace cd /Public/Members/matt/Doc/VN mv vnAccount.O vnAccount.OO mv vnAccount vnAccount.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount . # Bars are 100%, may not be using the correct version (master on bh?) cds cp vnAccount vnAccount.2000.09.19 scp matt@bh1.physics.ubc.ca:/d/bh6/home/hkr/bin/vnAccount . # Even on SGI, vnAccount -help now seems reasonable # As matt@vnfe1 cds scp matt@bh1.physics.ubc.ca:/d/bh6/home/hkr/bin/vnAccount . cd system/vnshadow/Rtop.2000 vnAccount # did *NOT* create vnAccount directory for html files LS | grep -v '^2000' | pre mv | post '../vnAccount' | csh ############################################################ Tue Sep 19 19:14:23 PDT 2000 ############################################################ (1) Generating statistics for 2000.09, 2000.08 cd vnshadow mkdir Rtop.2000.09 cd Rtop.2000 LS | grep '^2000:09' | pre cp | post ../Rtop.2000.09 | csh mkdir ../vnAccount.09 LS | grep -v '^2000' | pre mv | post '../vnAccount.09' | csh cd vnshadow mkdir Rtop.2000.08 cd Rtop.2000 LS | grep '^2000:08' | pre cp | post ../Rtop.2000.08 | csh mkdir ../vnAccount.08 LS | grep -v '^2000' | pre mv | post '../vnAccount.08' | csh # As matt@laplace cd /Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.09 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.08 . # Usage up to about 75% thus far in September ############################################################ Tue Sep 19 19:55:12 PDT 2000 ############################################################ (1) New account for phys410 Physics 410 Account, nu cat<phys410 phys410:x:20000:9000:Physics 410 Account:/d/vnfe1/home/phys410:/bin/tcsh END vnNewUsers phys410 etc; sola; vs # rxmtOB24P4Kb2 vnDistEtc shadow # Create phys410 group # As matt@vnfe1 etc Arc group . . . ascher::2000: other::9000: phys410::20000: ssh root@vnfe1 'cd ~phys410; cd ..; chown phys410.phys410 phys410; ls -ltd phys410' # As phys410@vnfe1 ssh-keygen 1024 35 126786164258634350333838363402323184065791825669446143052647178146778519662104266354600392707445107331366150538939073591229453880796372977463713709801483409510319660236845327919159638206188900566085560607896385785836564466144591073107961314171478590509720989371944003811023100544822580866802701918543ys410@vnfe1.physics.ubc.ca ############################################################ Fri Sep 22 17:06:01 PDT 2000 ############################################################ (1) vn8 apparently rebooted about 2 hours ago ############################################################ Wed Sep 27 18:07:46 PDT 2000 ############################################################ # Coding vnDupUser # As matt@laplace # Updated UNIX_SETUP/Dist to mirror .ssh, backing up # {laplace,godel,sgi1,sgi2} prior to export of # matt@laplace:~/.ssh/authorized_keys to root@ said # hosts # Basically finished vnDupUser hack created 'jaggarwa' account via vnDupUser jaggarwa 20001 20000 laplace # Removing and writing second level script to automate Accounts jaggarwa:x:1128:410:Januk Swarup Aggarwal:/d/sgi1/usr/people/jaggarwa:/bin/tcsh cherring:x:1129:410:Wade Cherrington:/d/sgi1/usr/people/cherring:/bin/tcsh rcoope:x:1130:410:Robin Coope:/d/sgi1/usr/people/rcoope:/bin/tcsh zacfong:x:1131:410:Zachary Fong:/d/sgi1/usr/people/zacfong:/bin/tcsh weiyang:x:1132:410:Weiyang Jiang:/d/sgi1/usr/people/weiyang:/bin/tcsh ckaiser:x:1133:410:Christina Kaiser:/d/sgi1/usr/people/ckaiser:/bin/tcsh kimlam:x:1134:410:Kim Lam:/d/sgi1/usr/people/kimlam:/bin/tcsh bhlee:x:1135:410:Brian Ching-Hsin Lee:/d/sgi1/usr/people/bhlee:/bin/tcsh jnakane:x:1136:410:Jonathan Nakane:/d/sgi1/usr/people/jnakane:/bin/tcsh mrege:x:1137:410:Meenal Rege:/d/sgi1/usr/people/mrege:/bin/tcsh ctong:x:1138:410:Christopher Tong:/d/sgi1/usr/people/ctong:/bin/tcsh mwarren:x:1139:410:Mya Rose Warren:/d/sgi1/usr/people/mwarren:/bin/tcsh mzimonja:x:1140:410:Marija Zimonja:/d/sgi1/usr/people/mzimonja:/bin/tcsh kbrought:x:1141:410:Kesten Broughton:/d/sgi1/usr/people/kbrought:/bin/tcsh soundara:x:1142:410:Govindarajan Soundararajan:/d/sgi1/usr/people/soundara:/bin/tcsh lauren:x:1143:410:Lauren Anne MacArthur:/d/sgi1/usr/people/lauren:/bin/tcsh xinghua:x:1144:410:Xinghua Li:/d/sgi1/usr/people/xinghua:/bin/tcsh bpark:x:1145:410:Brian Park:/d/sgi1/usr/people/bpark:/bin/tcsh ericms:x:1146:410:Eric Strohm:/d/sgi1/usr/people/ericms:/bin/tcsh sjwu:x:1147:410:Jason Shao-Chun Wu:/d/sgi1/usr/people/sjwu:/bin/tcsh aballest:x:1148:410:Anders Ballestad:/d/sgi1/usr/people/aballest:/bin/tcsh astevens:x:1149:410:Adrian Stevens:/d/sgi1/usr/people/astevens:/bin/tcsh mghafari:x:1150:410:Mohamad Reza Ghafari Banaee:/d/sgi1/usr/people/mghafari:/bin/tcsh kkondra:x:1151:410:Kevin Kondra:/d/sgi1/usr/people/kkondra:/bin/tcsh elespt:x:1152:410:Philip Eles:/d/sgi1/usr/people/elespt:/bin/tcsh jaggarwa cherring rcoope zacfong weiyang ckaiser kimlam bhlee jnakane mrege ctong mwarren mzimonja kbrought soundara lauren xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt !!ssh matt@vnfe1.physics.ubc.ca cat /d/vnfe1/home/matt/scripts/vn410 #!/bin/sh -x X=false NUID=20000 for u in jaggarwa cherring rcoope zacfong weiyang ckaiser kimlam bhlee jnakane mrege ctong mwarren mzimonja kbrought soundara lauren xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt; do CMD="vnDupUser $u $NUID 20000 laplace" echo "Would execute '$CMD'" $X && $CMD NUID=`expr $NUID + 1` done # And probably crashed a ton of nodes # Kill cherring, rcoope, zacfong, weiyang # Put in a sleep 10, seems to do the trick, nope ... try sleep 30 #!/bin/sh -x X=true #NUID=20000 #for u in jaggarwa cherring rcoope zacfong weiyang ckaiser kimlam bhlee jnakane mrege ctong mwarren mzimonja kbrought soundara lauren xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt; do NUID=20016 for u in xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt; do CMD="vnDupUser $u $NUID 20000 laplace" echo "Would execute '$CMD'" $X && $CMD NUID=`expr $NUID + 1` sleep 30 done # Collided 'jaggarwa' and 'phys410' (pretty pathetic script!) #!/bin/sh -x X=true #NUID=20000 #for u in jaggarwa cherring rcoope zacfong weiyang ckaiser kimlam bhlee jnakane mrege ctong mwarren mzimonja kbrought soundara lauren xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt; do #NUID=20016 #for u in xinghua bpark ericms sjwu aballest astevens mghafari kkondra elespt; do NUID=20025 for u in jaggarwa; do CMD="vnDupUser $u $NUID 20000 laplace" echo "Would execute '$CMD'" $X && $CMD NUID=`expr $NUID + 1` sleep 30 done !!ssh matt@vnfe1 'cd ..; ls -lt | grep phys410' drwxr-xr-x 2 jaggarwa phys410 1024 Sep 27 19:39 jaggarwa/ drwxr-xr-x 2 elespt phys410 1024 Sep 27 19:34 elespt/ drwxr-xr-x 2 kkondra phys410 1024 Sep 27 19:33 kkondra/ drwxr-xr-x 2 mghafari phys410 1024 Sep 27 19:33 mghafari/ drwxr-xr-x 2 astevens phys410 1024 Sep 27 19:32 astevens/ drwxr-xr-x 2 aballest phys410 1024 Sep 27 19:31 aballest/ drwxr-xr-x 2 sjwu phys410 1024 Sep 27 19:31 sjwu/ drwxr-xr-x 2 ericms phys410 1024 Sep 27 19:30 ericms/ drwxr-xr-x 2 bpark phys410 1024 Sep 27 19:30 bpark/ drwxr-xr-x 2 xinghua phys410 1024 Sep 27 19:29 xinghua/ drwxr-xr-x 2 lauren phys410 1024 Sep 27 19:27 lauren/ drwxr-xr-x 2 soundara phys410 1024 Sep 27 19:27 soundara/ drwxr-xr-x 2 kbrought phys410 1024 Sep 27 19:26 kbrought/ drwxr-xr-x 2 mzimonja phys410 1024 Sep 27 19:26 mzimonja/ drwxr-xr-x 2 mwarren phys410 1024 Sep 27 19:26 mwarren/ drwxr-xr-x 2 ctong phys410 1024 Sep 27 19:25 ctong/ drwxr-xr-x 2 mrege phys410 1024 Sep 27 19:25 mrege/ drwxr-xr-x 2 jnakane phys410 1024 Sep 27 19:24 jnakane/ drwxr-xr-x 2 bhlee phys410 1024 Sep 27 19:24 bhlee/ drwxr-xr-x 2 kimlam phys410 1024 Sep 27 19:24 kimlam/ drwxr-xr-x 2 ckaiser phys410 1024 Sep 27 19:24 ckaiser/ drwxr-xr-x 2 weiyang phys410 1024 Sep 27 19:23 weiyang/ drwxr-xr-x 2 zacfong phys410 1024 Sep 27 19:23 zacfong/ drwxr-xr-x 2 rcoope phys410 1024 Sep 27 19:22 rcoope/ drwxr-xr-x 2 cherring phys410 1024 Sep 27 19:22 cherring/ drwxr-xr-x 5 phys410 phys410 1024 Sep 20 20:59 phys410/ ############################################################ Sun Oct 1 06:23:25 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-10-01-0623.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-10-01-0623 LS | grep '^2000:09' | pre CP | post ../Rtop.2000.09 | csh cd ../Rtop.2000.09 vnAccount LS | grep -v '^2000' | pre MV | post '../vnAccount.09' | csh scp -r vnAccount.09 matt@laplace.physics.ubc.ca:/Public/Members/matt/Doc/VN cd /home/matt/system/vnshadow/Rtop.2000 vnAccount LS | grep -v '^2000' | pre MV | post '../vnAccount' | csh ############################################################ Tue Oct 3 09:04:19 PDT 2000 ############################################################ (1) Installing 'ecell' on 'vnfe1' as per Kesten Broughton Physics 410 request # As root@vnfe1 cdi mkdir RPMS cd RPMS scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/UNIX_SETUP/linux_other_src/ecell-1.0-2.i386.rpm . rpm -ql -p ecell-1.0-2.i386.rpm rpm --install ecell-1.0-2.i386.rpm ############################################################ Tue Oct 3 14:36:04 PDT 2000 ############################################################ (1) Created 410 accounts using 'laplace' passwords, but hadn't updated /etc/shadow on 'laplace' from 'sgi1' (Thanks Kesten) rrvi /etc/shadow Rationalized laplace:/etc/shadow, then hand updated cluster /etc/shadow and resdistributed # Check cd vn mkdir 410 cd 410 scp root@sgi1:/etc/shadow sgi1.shadow scp root@vnfe1:/etc/shadow vnfe1.shadow sed 's/:/ /g' < sgi1.shadow | nth 1 2 > sgi1 # Hack sgi1 sed 's/:/ /g' < vnfe1.shadow | nth 1 2 > vnfe1 # Hack vnfe1 diff sgi1 vnfe1 # OK ############################################################ Wed Oct 4 08:26:45 PDT 2000 ############################################################ (1) New account for Toby Johnson, Postdoc Zoology (Otto) Lists Edinburgh number Wants PAML installed http://abacus.gene.ucl.ac.uk/software/paml.html nu cat<johnson johnson:x:9029:9000:Toby Johnson:/d/vnfe1/home/johnson:/bin/bash END vnNewUsers johnson etc; sola; vs nu cd Blurbs cp dvernon johnson etc # vCMYQNHrRwL3c vnDistEtc shadow # Missed the trailing 'c' first time through! # Set .forward ssh root@vnfe1 'cd ~johnson; echo "johnson@zoology.ubc.ca" > .forward; chown johnson.other .forward; ls -al; cat .forward' ############################################################ Wed Oct 4 08:41:42 PDT 2000 ############################################################ PAML as per Toby Johnson's request http://abacus.gene.ucl.ac.uk/software/paml.html # Netscape downloaded matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/paml3.0a.tar.Z # Tar'ed up in bad format, doesn't untar to a directory # Re-packaged as paml3.0a.tar.gz scp matt@laplace.physics.ubc.ca:/tmp/paml3.0a.tar.gz . cda gunzip -c paml3.0a.tar.gz | tar xf - # Simple Makefile # As matt@vnfe1 cda Installz paml3.0a # ssh root@vnfe1 'cdi; setenv CFLAGS "-O6"; setenv PREFIX /usr/local; tar zxf ~matt/autoconf/paml3.0a.tar.gz; cd paml3.0a; make install' # Installs following executables baseml codeml codemlsites basemlg pamp evolver mcmctree yn00 chi2 foreach p (baseml codeml codemlsites basemlg pamp evolver mcmctree yn00 chi2) $p end # As root@vnfe1 # Commands not found ssh root@vnfe1 'cdi; setenv CFLAGS "-O6"; setenv PREFIX /usr/local; tar zxf ~matt/autoconf/paml3.0a.tar.gz; cd paml3.0a; make install' ssh root@vnfe1 'cd /usr/local/bin; /bin/rm -f baseml codeml codemlsites basemlg pamp evolver mcmctree yn00 chi2' ssh root@vnfe1 'cdi; RM -r paml3.0a' ssh root@vnfe1 'cdi; setenv CFLAGS "-O6"; setenv PREFIX /usr/local/paml; tar zxf ~matt/autoconf/paml3.0a.tar.gz; cd paml3.0a; make install' ssh root@vnfe1 'cdi; RM -r paml3.0a' vnallbgCommand 'cdi; setenv CFLAGS "-O6"; setenv PREFIX /usr/local/paml; tar zxf ~matt/autoconf/paml3.0a.tar.gz; cd paml3.0a; make install' vnallCommand 'cd /usr/local/paml/bin; ls' # Need to compile un-optimized Matt, I tried compiling the PAML programs myself and got the same problem when I used option -O3. They work fine when compiled with no optimization option at all, though. I have put working binaries in ~johnson/bin, perhaps you could just use those to replace the ones at /usr/local/paml/bin. I'm emailing the author of PAML about some other stuff so I'll mention the problem with segmentation faults at the same time. Cheers, Toby. vnallbgCommand 'cdi; RM -r paml3.0a' vnallbgCommand 'cdi; setenv CFLAGS " "; setenv PREFIX /usr/local/paml; tar zxf ~matt/autoconf/paml3.0a.tar.gz; cd paml3.0a; make install' ############################################################ Sat Oct 7 07:45:47 PDT 2000 ############################################################ (1) Letting 'bhdemo' mount vn disks bhdemo.physics.ubc.ca 142.103.234.31 # As matt@vnfe1 etc # updated hosts.allow vnDistEtc hosts.allow vnallbgCommand 'killall -HUP inetd' # As root@bhdemo mkdir -p /d/vnfe1/home mkdir -p /d/vnfe1/home2 mkdir -p /d/vnfe2/home mkdir -p /d/vnfe2/home2 mkdir -p /d/vnfe3/home mkdir -p /d/vnfe3/home2 /etc/fstab mount -a # not working, nfs not enabled ############################################################ Mon Oct 9 20:19:47 PDT 2000 ############################################################ (1) New account for Daniel Steck nu cat<dsteck dsteck:x:9030:9000:Daniel Steck:/d/vnfe1/home/dsteck:/bin/tcsh END vnNewUsers dsteck etc; sola; vs cd Blurbs cp johnson dsteck etc TODO # 0.iBvnmZraj.o vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~dsteck; echo "dsteck@george.ph.utexas.edu" > .forward; chown dsteck.other .forward; ls -al; cat .forward' ############################################################ Tue Oct 10 21:10:40 PDT 2000 ############################################################ (1) Attempting gnuplot update via copy of files from bh1 # As root@vnfe1 whereis gnuplot | after \: /usr/bin/gnuplot /usr/share/gnuplot.gih /usr/man/man1/gnuplot.1.bz2 ls -lt `whereis gnuplot | after \:` mkdir -p /home/matt/system/vn/image/master/gnuplot.3.7p0.1 cd !$ cp /usr/bin/gnuplot usr-bin-gnuplot cp /usr/share/gnuplot.gih usr-share-gnuplot.gih cp /usr/man/man1/gnuplot.1.bz2 usr-man-man1-gnuplot.1.bz2 # Coded vn-gnuplot-update vnallbgCommand vn-gnuplot-update # Probably not a good idea, ton of simultaneous scp's ! vnallCommand 'ls -lt `whereis gnuplot | after \:`' vnallCommand 'ps -elf | grep gnuplot; killall vn-gnuplot-update; ps -elf | grep gnuplot' vnallCommand vn-gnuplot-update vnallCommand 'pstree | grep gnuplot | grep -v grep' vn1, vn6, vn7, vn10, vn12 vnallbgCommand 'kill -9 `ps -elf | grep vn-gnuplot-update | grep -v grep | nth 4` ' ############################################################ Thu Oct 12 15:48:56 PDT 2000 ############################################################ (1) New account for Doug James (long overdue!) IAM Grad Student (Pai supervisor CPSC) nu cat<djames djames:x:9040:9000:Doug L. James:/d/vnfe1/home/djames:/bin/tcsh END vnNewUsers djames etc; sola; vs cd Blurbs cp dsteck djames etc TODO # GfxujhVPJEPuo vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~djames; echo "djames@iam.ubc.ca" > .forward; chown djames.other .forward; ls -al; cat .forward' ############################################################ Sun Oct 15 12:49:13 PDT 2000 ############################################################ (1) PGI compilers seem to have died (did get mail!) vnfeCommand '/bin/su adm -c "/usr/local/pgi/linux86/bin/lmgrd.rc start"' # No go # /usr/local/pgi/linux86/bin/lmgrd.rc /usr/local/pgi flexlm.log /usr/local/pgi/linux86/bin/lmgrd.rc stop vnfeCommand '/bin/su adm -c "/usr/local/pgi/linux86/bin/lmgrd.rc start"' # As root@vnfe1 su adm cd /usr/local/pgi /usr/local/pgi/linux86/bin/lmgrd.rc start # Try changing port /usr/local/pgi/license.dat # Nope 7496 -> 7497 SERVER vnfe1.physics.ubc.ca 0090278d4a4c 7496 SERVER vnfe2.physics.ubc.ca 0090278d466a 7496 SERVER vnfe3.physics.ubc.ca 0090278d4706 7496 DAEMON pgroupd /usr/local/pgi/linux86/bin/pgroupd FEATURE pghpf-linux86 pgroupd 3.100 31-dec-0 2 2B381061CB494D50CD0D \ VENDOR_STRING=109967:16 ck=189 FEATURE pgf90-linux86 pgroupd 3.100 31-dec-0 2 FB18B0F189F8CE0C1485 \ VENDOR_STRING=109967:16 ck=44 FEATURE pgf77-linux86 pgroupd 3.100 31-dec-0 2 2BF8F0D19592B91903D6 \ VENDOR_STRING=109967:16 ck=195 FEATURE pgcc-linux86 pgroupd 3.100 31-dec-0 2 BBD840913A0F2F8A1AA2 \ VENDOR_STRING=109967:16 ck=222 FEATURE pgCC-linux86 pgroupd 3.100 31-dec-0 2 3BD880911A0F4F2ADA82 \ VENDOR_STRING=109967:16 ck=253 FEATURE pgprof pgroupd 3.100 31-dec-0 2 BBA86061A846172A118C \ VENDOR_STRING=109967:16 ck=31 FEATURE pgdbg pgroupd 3.100 31-dec-0 2 3B78B0E13EB972ACFE6E \ VENDOR_STRING=109967:16 ck=244 # Stop cron jobs # Stop pgroupd ? # Yup ... might have done the trick vnfeCommand 'netstat -a | grep 7496; netstat -a | grep 7496 | wc' # SUMMARY # To restart PGI compilers (1) Stop lmgrd daemon vnfeCommand '/usr/local/pgi/linux86/bin/lmgrd.rc stop' (2) Ensure that 'pgroupd' daemon is stopped as well, try regular 'kill', but may have to 'kill -9' jj pgroupd ssh vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start ssh vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start ssh vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start ############################################################ Sun Oct 15 15:25:34 PDT 2000 ############################################################ (1) Installing Java as per Doug James' request Thanks for the account. The current JavaTM 2 SDK, Standard Edition Version 1.3.0 for Linux (Intel x86) is available at http://java.sun.com/j2se/1.3/ in either tar or Redhat rpm formats. Please install at your convenience anytime in the next few weeks. # As matt@laplace cd /usr2/people/matt/system/UNIX_SETUP/linux_other_src scp matt@dsl105.net.ubc.ca:/home/matt/.netscape/j2sdk-1_3_0-linux-rpm.sh . vnallbgCommand 'cdi; test -d RPMS || mkdir RPMS' vnallbgCommand 'cdi; ls -ltd RPMS' vnallbgCommand 'cdi; cd RPMS; scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/UNIX_SETUP/linux_other_src/j2sdk-1_3_0-linux-rpm.sh .' vnallCommand 'cdi; cd RPMS; sh ./j2sdk-1_3_0-linux-rpm.sh' vnallCommand 'cdi; cd RPMS; sh ./j2sdk-1_3_0-linux-rpm.sh' # Unpacked on vnfe1, vnfe2, vnfe3 ... # as root@vnfe1 cd /var/tmp/install/RPMS foreach i (`iota 64`) scp j2sdk-1_3_0-linux.rpm vn${i}:/var/tmp/install/RPMS ssh vn${i} 'ls -lt /var/tmp/install/RPMS' end # As root@vnfe1 cd /var/tmp/install/RPMS rpm -iv j2sdk-1_3_0-linux.rpm error: failed dependencies: glibc >= 2.1.2-11 is needed by jdk-1.3-fcs ############################################################ Sat Oct 21 08:03:42 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-10-21-0803.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-10-21-0803 # Accumulating October's usage in /home/matt/system/vnshadow/vnAccount.10 ############################################################ Sat Oct 21 08:03:42 PDT 2000 ############################################################ (1) Second try at installing new JDK as per Doug James request Yes, you don't want to upgrade glibc. Sorry for not looking into this for you. I believe a better choice exists for the mandrake 6.1 install, namely IBM's Java 2 SDK, v1.3: http://www.ibm.com/java/jdk/linux130/ http://www.ibm.com/java/jdk/linux130/requirements.html Mandrake 6.1 satisfies the requirements as is, X11R6 3.3.x GNU C Runtime Library (glibc) version 2.1 or greater Linux kernel 2.2.x I've gone ahead and downloaded it for you (so you don't have to register/logon/download), and placed both rpm and tgz formats in my home directory: /d/vnfe1/home/djames/IBMJava2-SDK-1_3-1_1_i386.rpm /d/vnfe1/home/djames/IBMJava2-SDK-13.tgz If you experience any installation problems please let me know. # As root@vnfe1 cd /var/tmp/install/RPMS cp /d/vnfe1/home/djames/IBMJava2-SDK-1_3-1_1_i386.rpm . rpm -iv IBMJava2-SDK-1_3-1_1_i386.rpm IBMJava2-SDK-1.3-1.1 unpacking of archive failed: cpio: Bad magic # NOPE rpm --query IBMJava2-SDK-1_3-1_1_i386 cp /d/vnfe1/home/djames/IBMJava2-SDK-13.tgz . tar zxf *tgz cd tar: Skipping to next file header gzip: stdin: invalid compressed data--crc error tar: Child returned status 1 tar: Error exit delayed from previous errors # Get it myself http://www.ibm.com/java/jdk/linux130/ # Select download, filled out the freaking form /home/matt/IBMJava2-SDK-1.3-1.1.i386.rpm /home/matt/IBMJava2-JRE-1.3-1.1.i386.rpm rpm -q -l -p IBMJava2-SDK-1.3-1.1.i386.rpm # Works on ALL versions scp matt@dsl105.net.ubc.ca:/home/matt/IBMJava2-JRE-1.3-1.1.i386.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/IBMJava2-SDK-1.3-1.1.i386.rpm . rpm -iv IBMJava2-SDK-1.3-1.1.i386.rpm IBMJava2-SDK-1.3-1.1 #rpm -iv IBMJava2-JRE-1.3-1.1.i386.rpm # Conflicts rpm -iv IBMJava2-JAAS-1.3-1.1.i386.rpm rpm -iv IBMJava2-JAVACOMM-1.3-1.1.i386.rpm rpm -q IBMJava2-SDK-1.3-1.1 rpm -q IBMJava2-JAAS-1.3-1.1 rpm -q IBMJava2-JAVACOMM-1.3-1.1 ############################################################ Thu Oct 12 15:48:56 PDT 2000 ############################################################ (1) New account for Roger Miller (grad student) (Rob Kiefl PHYS) nu cat<miller miller:x:9041:9000:Roger Miller:/d/vnfe1/home/miller:/bin/tcsh END vnNewUsers miller etc; sola; vs cd Blurbs cp dsteck miller etc # 4o9eP/CY6aEVk vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~miller; echo "miller@physics.ubc.ca" > .forward; chown miller.other .forward; ls -al; cat .forward' ############################################################ Fri Oct 27 08:56:15 PDT 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-10-27-0856.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-10-27-0856 # Accumulating October's usage ... cd /home/matt/system/vnshadow/Rtop.2000 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-10-21-0803.tar.gz tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-10-27-0856.tar.gz cd Rtop.2000-10-21-0803; mv * ..; cd ..; rmdir Rtop.2000-10-21-0803 cd Rtop.2000-10-27-0856; mv * ..; cd ..; rmdir Rtop.2000-10-27-0856 cd /home/matt/system/vnshadow mkdir Rtop.2000.10 cd Rtop.2000 LS | grep '2000:10' | pre cp | post ../Rtop.2000.10 | csh cd /home/matt/system/vnshadow/Rtop.2000.10 vnAccount mv ../vnAccount.10 ../vnAccount.10.O mkdir ../vnAccount.10 LS | grep -v '^2000' | pre MV | post '../vnAccount.10' | csh vnAccount -p 25 LS | grep -v '^2000' | pre MV | post '../vnAccount.10' | csh cd .. Rcp vnAccount.10 cd vnshadow/Rtop.2000 LS > /tmp/LS2000 wc /tmp/LS2000 48975 48975 930525 /tmp/LS2000 head -2 /tmp/LS2000; tail -2 /tmp/LS2000 2000:01:01:0000.47 2000:01:01:0008.33 2000:10:27:0837.03 2000:10:27:0846.58 vnAccount cd .. ls mv vnAccount.O vnAccount.OO mv vnAccount vnAccount.O mkdir vnAccount back LS | grep -v '^2000' | pre MV | post '../vnAccount' | csh cd .. scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount . ############################################################ Tue Nov 7 13:43:26 PST 2000 ############################################################ (1) New account for Mike Plischke (Professor and Chair, Dept of Physics, SFU) nu cat<plischke plischke:x:9042:9000:Michael Plischke:/d/vnfe1/home/plischke:/bin/csh END vnNewUsers plischke etc; sola; vs cd Blurbs cp dsteck plischke etc # GgIdgzro/Cusg vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~plischke; echo "plischke@sfu.ca" > .forward; chown plischke.other .forward; ls -al; cat .forward' ############################################################ Tue Nov 7 14:02:45 PST 2000 ############################################################ (1) Archiving Rtop files, generating statistics cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-11-07-1401.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-11-07-1401 cd /home/matt/system/vnshadow/Rtop.2000 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-11-07-1401.tar.gz cd Rtop.2000-11-07-1401; mv * ..; cd ..; rmdir Rtop.2000-11-07-1401 cd /home/matt/system/vnshadow mkdir Rtop.2000.10 cd Rtop.2000 LS | grep '2000:10' | pre CP | post ../Rtop.2000.10 | csh cd /home/matt/system/vnshadow/Rtop.2000.10 RM -rf ../vnAccount.10.O mv ../vnAccount.10 ../vnAccount.10.O mkdir ../vnAccount.10 vnAccount -p 25 LS | grep -v '^2000' | pre MV | post '../vnAccount.10' | csh cd .. Rcp vnAccount.10 cd vnshadow/Rtop.2000 LS > /tmp/LS2000 wc /tmp/LS2000 50626 50626 961894 /tmp/LS2000 head -2 /tmp/LS2000; tail -2 /tmp/LS2000 2000:01:01:0000.47 2000:01:01:0008.33 2000:11:07:1338.36 2000:11:07:1348.51 vnAccount cd .. ls RM -r vnAccount.OO mv vnAccount.O vnAccount.OO mv vnAccount vnAccount.O mkdir vnAccount back LS | grep -v '^2000' | pre MV | post '../vnAccount' | csh cd .. scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount . # PROBLEMS with repeated process numbers! Suqin's total usage # actually going DOWN! ############################################################ Wed Nov 8 11:37:34 PST 2000 ############################################################ (1) Updating wu-ftpd per Dan Steck's warning (advisory MDKSA-2000:014) Package name wu-ftpd Date July 2nd, 2000 Advisory ID MDKSA-2000:014 Affected versions 6.0, 6.1, 7.0, 7.1 Urgency Essential Problem Description: Wu-ftpd is vulnerable to a very serious remote attack in the SITE EXEC implementation. Because of user input going directly into a format string for a *printf function, it is possible to overwrite important data, such as a return address, on the stack. When this is accomplished, the function can jump into shellcode pointed to by the overwritten eip and execute arbitrary commands as root. While exploited in a manner similar to a buffer overflow, it is actually an input validation problem. Anonymous ftp is exploitable making it even more serious as attacks can come anonymously from anywhere on the internet. This update also fixes the setproctitle() vulnerability which involves a missing character-formatting argument in setproctitle(), a call which sets the string used to display process identifier information. Updated Packages: Please upgrade to the following packages: Linux-Mandrake 6.0: b4340d1007f5128d5d80502007c11a17 6.0/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm bb37dbaf5f9fc3953c2869592df608c9 6.0/SRPMS/wu-ftpd-2.6.0-7mdk.src.rpm Linux-Mandrake 6.1: 89467e25e432271892aea433b613b4f7 6.1/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm bb37dbaf5f9fc3953c2869592df608c9 6.1/SRPMS/wu-ftpd-2.6.0-7mdk.src.rpm Linux-Mandrake 7.0: 7e240d30b2e8cba1ba0c3dc59908aef7 7.0/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm bb37dbaf5f9fc3953c2869592df608c9 7.0/SRPMS/wu-ftpd-2.6.0-7mdk.src.rpm Linux-Mandrake 7.1: 2b83dcb120012f1009e707398b5f4dc4 7.1/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm bb37dbaf5f9fc3953c2869592df608c9 7.1/SRPMS/wu-ftpd-2.6.0-7mdk.src.rpm References: CERT Advisory CA-2000-13: Two Input Validation Problems in FTPD Upgrade: To upgrade automatically, use MandrakeUpdate. If you want to upgrade manually, download the updated package from one of our FTP server mirrors and uprade with "rpm -Uvh package_name". Verification: Please verify these md5 checksums of the updates prior to upgrading to ensure the integrity of the downloaded package. You can do this by running the md5sum program on the downloaded package by using "md5sum package.rpm". These packages are also signed by the Linux Mandrake Security Team for security. Use our GnuPG key to verify the packages with RPM. You can verify each package with the "rpm --checksig package_name" command. You can also verify the md5sums of each package using "rpm --checksig --nogpg package_name". Please note that in order to verify the GnuPG keys, you must have GnuPG installed, our public key added to your public key ring, and an RPM version of 3.0 or higher. (2) Running 6.1 on cluster Linux-Mandrake 6.1: 89467e25e432271892aea433b613b4f7 6.1/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm bb37dbaf5f9fc3953c2869592df608c9 6.1/SRPMS/wu-ftpd-2.6.0-7mdk.src.rpm http://sunsite.ualberta.ca/pub/Mirror/Linux/mandrake/updates/6.1/RPMS/wu-ftpd-2.6.0-7mdk.i586.rpm Downloaded wu-ftpd-2.6.0-7mdk.i586.rpm # As matt@vnfe1 cd RPM scp matt@dsl105.net.ubc.ca:/home/matt/wu-ftpd-2.6.0-7mdk.i586.rpm . md5sum wu-ftpd-2.6.0-7mdk.i586.rpm 89467e25e432271892aea433b613b4f7 wu-ftpd-2.6.0-7mdk.i586.rpm # rpm -Uvh wu-ftpd-2.6.0-7mdk.i586.rpm # As root@vn4 cd /var/tmp/install/RPMS cp ~matt/RPM/wu-ftpd-2.6.0-7mdk.i586.rpm . rpm -Uvh wu-ftpd-2.6.0-7mdk.i586.rpm rpm -q wu-ftpd wu-ftpd-2.6.0-7mdk vnallCommand 'cdi; cd RPMS; pwd; ls; cp ~matt/RPM/wu-ftpd-2.6.0-7mdk.i586.rpm .; ls' vnallCommand 'cdi; cd RPMS; rpm -Uvh wu-ftpd-2.6.0-7mdk.i586.rpm; rpm -q wu-ftpd' laplace 21> ftp vnfe1 Connected to vnfe1.physics.ubc.ca. 220 vnfe1.physics.ubc.ca FTP server (Version wu-2.6.0(1) Thu Jun 29 02:42:43 EDT 2000) ready. Name (vnfe1:matt): laplace 22> ftp vn64 Connected to vn64.physics.ubc.ca. 220 vn64.physics.ubc.ca FTP server (Version wu-2.5.0(1) Sat May 22 11:15:07 GMT 1999) ready. ############################################################ Wed Nov 15 10:20:07 PST 2000 ############################################################ CRASH_75 (1) vn20 down Roman and Bian running ############################################################ Mon Nov 20 14:50:50 PST 2000 ############################################################ (1) Archiving Rtop files, generating statistics cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-11-20-1451.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-11-20-1451 cd /home/matt/system/vnshadow/Rtop.2000 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-11-20-1451.tar.gz cd Rtop.2000-11-20-1451; mv * ..; cd ..; rmdir Rtop.2000-11-20-1451 cd /home/matt/system/vnshadow mkdir Rtop.2000.11 cd Rtop.2000 LS | grep '2000:11' | pre CP | post ../Rtop.2000.11 | csh cd system/vnshadow/Rtop.2000.11 mkdir ../vnAccount.11 vnAccount -p 25 # TODO LS | grep -v '^2000' | pre MV | post '../vnAccount.11' | csh cd .. Rcp vnAccount.11 ############################################################ Thu Nov 23 13:53:08 PST 2000 ############################################################ (1) New account for Stephan de Wekker (Grad Student, EOS, Douw Steyn supervisor) nu cat<sdewekker sdewekker:x:9043:9000:Stephan de Wekker:/d/vnfe1/home/sdewekker:/bin/tcsh END vnNewUsers sdewekker etc; sola; vs cd Blurbs cp dsteck sdewekker TODO etc # aEV7RCLT7U9ps vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~sdewekker; echo "sdewekker@eos.ubc.ca" > .forward; chown sdewekker.other .forward; ls -al; cat .forward' ############################################################ Fri Nov 24 07:17:39 PST 2000 ############################################################ (1) Something has changed with 'top' ? Unknown HZ value (0)! Assume 100 http://lisoleg.aka.citf.net/proc.txt (1) Jiffie: internal timeunit for the kernel. On the i386 1/100s, on the Alpha 1/1024s. See the HZ define in /usr/include/asm/param.h for the exact value on your system. Apparently somewhere between 245 and 248 days clock rolls over #ifndef _ASMi386_PARAM_H #define _ASMi386_PARAM_H #ifndef HZ #define HZ 100 #endif . . . ---------------------------------------------------------------------- > The default value of HZ is 100hz. Since the DEF_PRIORITY in sched.h > is 20, that means a processor intensive process gets a maximum of > 200ms or 1/5th of a second. Two intensive processes can hog the > processor for 0.4 seconds and so on. That's a long time when one > is running multimedia apps, games, or burning a CD. I presume that > there are good reasons to keep HZ at it's present value, but I'd like > to find out more about it. Can anyone point me to more information or > perhaps a previous discussion of this subject? Perhaps you noticed the slashdot.org posters reporting greatly improved responsiveness? You can and should change HZ, but might cause trouble. If you set the value to 1024 (like the Alpha uses) your clock rolls over in about 50 days. Maybe you reboot every month and do not care. Procps (ps, top, w, etc.) will break. You can recompile it, or you can get a new version that ought to tolerate HZ changes. http://www.cs.uml.edu/~acahalan/linux/procps-990103.tar.gz Allowed HZ values are: 50 60 100 128 256 1000 1024 Let me know if you need another. Oh, "top" CPU usage display is somewhat fixed too. Looks like has something to do with the length of time systems have been up. ---------------------------------------------------------------------- ############################################################ Fri Nov 24 17:45:18 PST 2000 ############################################################ (1) New account for Bela Joos (Visiting Prof at SFU Physics) nu cat<joos joos:x:9044:9000:Bela Joos:/d/vnfe1/home/joos:/bin/tcsh END vnNewUsers joos etc; sola; vs cd Blurbs cp dsteck joos TODO etc # aZIXHKFRpX8GU vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~joos; echo "joos@sfu.ca" > .forward; chown joos.other .forward; ls -al; cat .forward' ############################################################ Fri Nov 24 19:20:47 PST 2000 ############################################################ (1) Preparing for shutdown HACKED: vnN test $i = 1 || printf "142.103.237.%d " $i DISABLED: vnallbgCommand STRATEGY: Shut down vn1 ssh root@vn1 '/sbin/shutdown -y -g0 -h' Shut down vn2-vn64 vnnallbgCommand '/sbin/shutdown -y -g0 -h' Shut down vnfe1-vnfe3 vnfeallbgCommand '/sbin/shutdown -y -g0 -h' RESTORE: vnN, vnallbgCommand After shutdown of nodes, following did not reach power-down state vn3 vn5 vn18 vn22 vn33 vn44 vn45 vn49 vn52 (NFS) vn55 vn59 Also, problems with monitor/cable and vnfe1 (had to bring up dirty etc.) ############################################################ Sat Nov 25 17:11:27 PST 2000 ############################################################ (1) Power restored in machine room, new cable fixes monitor problem, vnfe3 recheck forced, will probably be the case for most of the nodes as well. Sat Nov 25 17:38:01 PST 2000 Front-ends back, bringing up nodes Powered up nodes, some noisiness in fans? vnallCommand ntptimeset # Something wrong with vn21 Needed manual fsck /dev/hda1 # Something wrong with vn28 OK vnallCommand 'df' vnallCommand 'grep -i Proce /proc/cpuinfo' vnallCommand 'ps -elf | grep ntpd | grep -v grep' OK # System looks OK ############################################################ Sat Nov 25 18:51:51 PST 2000 ############################################################ (1) RTOP Prompting for password on vn54, vn56, vn58, vn59, vn60 vnfe1:/home wasn't mounted ############################################################ Sun Nov 26 05:32:42 PST 2000 ############################################################ vn5 down about an hour ago See README.CRASH: CRASH_76 No apparent reason, but had to manually run fsck /dev/hda1 ############################################################ Sun Nov 26 08:42:54 PST 2000 ############################################################ (1) Long-standing problem: # Daily log rotate (cron/at) errors occured while rotating /var/lib/mysql/mysql.log stat of /var/lib/mysql/mysql.log failed: No such file or directory cd /etc/cron.daily grep -i rotate * logrotate:/usr/sbin/logrotate /etc/logrotate.conf # Add 'missingok' to /etc/logrotate.d/mysql mkdir /d/vnfe1/home/matt/system/vn/image/master/etc/logrotate.d cd !$ scp root@vnfe1:/etc/logrotate.d/mysql . cp mysql mysql.orig # Added missingok vnallbgCommand 'cd /etc/logrotate.d/; scp matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/logrotate.d/mysql .' vnallCommand 'grep missing /etc/logrotate.d/mysql' TODO: FIX THIS UP PROPERLY mysq LOG FILES CURRENTLY INCLUDE HOSTNAME ############################################################ Sun Nov 26 17:54:46 PST 2000 ############################################################ vn43 down about two hours ago See README.CRASH: CRASH_77 # sdewekker? # Had an incredible number of jobs "running" on the cluster. # Clearly needs to be a little more careful about his # mpirun'ing ############################################################ Sat Dec 2 06:52:00 PST 2000 ############################################################ idle 29 roman 20 jcohena 20 lothar 16 bian 10 mcase 8 plischke 6 tzenova 4 ghlim 4 daub 4 minghe 3 fransp 2 suqin 1 fengxs 1 ############################################################ Sat Dec 2 12:46:51 PST 2000 ############################################################ vnallbgCommand 'ps -elf | grep roman | grep -v grep' vnallbgCommand 'ps -elf | grep roman | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep jcohena | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep lothar | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep bian | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep mcase | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep plischke | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep tzenova | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep ghlim | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep daub | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep minghe | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep fransp | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep suqin | grep -v grep | nth 4 | pre kill | csh' vnallbgCommand 'ps -elf | grep fengxs | grep -v grep | nth 4 | pre kill | csh' Shut down vn2-vn64 vnnallbgCommand '/sbin/shutdown -y -g0 -h' OK Shut down vnfe1-vnfe3 vnfeallbgCommand 'umount -a -f -t nfs; df' ssh vnfe3 '/sbin/shutdown -y -g0 -h' ssh vnfe2 '/sbin/shutdown -y -g0 -h' /sbin/shutdown -y -g0 -h ############################################################ Sat Dec 2 16:38:00 PST 2000 ############################################################ # Front ends up # Nodes powered on vnPing # No response from vn28 # Appears to be hard-drive problem ... installing spare drive SEE README.CRASH CRASH_78 ############################################################ Sun Dec 3 06:29:34 PST 2000 ############################################################ [root@vnfe1]# date; down Sun Dec 3 06:29:22 PST 2000 vn20 down 8:59 SEE README.CRASH CRASH_79 ############################################################ Wed Dec 6 18:11:00 PST 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-12-06-1811.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf Rtop.2000-12-06-1811 ############################################################ Wed Dec 13 10:40:14 PST 2000 ############################################################ (1) Michael Mitton reports problems with pgCC Try installing bbh_grace in place Library and 'wave' build fine. ############################################################ Thu Dec 14 04:03:52 PST 2000 ############################################################ [root@vnfe1]# date; down Thu Dec 14 04:04:08 PST 2000 vn23 down 6:59 SEE README.CRASH CRASH_80 # Suqin's job?? (Probably has happened before) ############################################################ Thu Dec 14 08:16:26 PST 2000 ############################################################ Flat out power consumption cd /home/matt/debug/rnpl/wave2d Mpirun 24 wave id_12_5000 Mpirun 12 wave id_12_5000 # UPS loads 64 55 55 70 vnallbgCommand 'killall wave; killall Lock;' ############################################################ Sun Dec 17 09:09:07 PST 2000 ############################################################ (1) Downloading NWChem Binary Distribution as per FAX from Theresa Windus MSIN K1-83, PO Box 999 High Performance Computational Chemistry Group William R Wiley, Environmental Molecular Sciences Laboratory Pacific Northwest National Laboratory Richland WA 99352-0999 nwchem-support@emsl.pnl.gov http://www.emsl.pnl.gov:2080/docs/nwchem/download.html nwc_bin p4FgG8md # As matt@laplace http://www.emsl.pnl.gov:2080/docs/nwchem/download.html # Downloading binary http://www.emsl.pnl.gov/auth/nwc_down/cgi-bin/nwc_down.pl # Contents of PLEASE_REAMDME The file names identify the NWChem Target, operating system , and possibly processor type: All files of any prefix pertain to the same set of files. The suffix identifies: .tar.Z --> unix compressed tar file .tgz --> GNU's gzip compressed tar file .Manifest --> List of files in any of the tar files For example: name identifier contents ---------------------------------------------------------------------------------- QA-4.0.* Contains quality assurance test inputs doc-4.0.* Contains the manuals (Note that the best place to get the documentation is from the web pages since changes may need to be made.) nwchem-4.0.JAVA.* The Java based problem reporting tool. OS independent nwchem-4.0-SOLARIS-ultrasparc.* Generic ultrasparc Solaris build nwchem-4.0-LINUX.* Generic linux build with g77 Files with the identifier "static" in them are builds with static libraries. Note that on the Sun, some Unix libraries are not linked in statically. If you have trouble down-loading these files please send mail to nwchem-support@emsl.pnl.gov describing the date/time you tried to down-load the files and the problem you are having. Remember that some browsers will automatically uncompress things and not change the name of the file. This is a function of which browser you use, the release of that browser, and your specific configuration parameters. If possible use the "save link as" functionality to try to avoid this, sometimes it works sometimes not. # Download nwchem-4.0-LINUX.* nwchem-4.0.JAVA.* QA-4.0.* doc-4.0.* pwd ; ls -lt /usr2/people/matt/autoconf/nwchem total 54160 -rw-r--r-- 1 matt choptuik 453620 Dec 17 09:26 doc.tgz -rw-r--r-- 1 matt choptuik 3571 Dec 17 09:26 doc.Manifest -rw-r--r-- 1 matt choptuik 7069830 Dec 17 09:23 QA.tgz -rw-r--r-- 1 matt choptuik 7142 Dec 17 09:21 QA.Manifest -rw-r--r-- 1 matt choptuik 10417801 Dec 17 09:21 nwchem-4.0-LINUX.static.tgz -rw-r--r-- 1 matt choptuik 9733827 Dec 17 09:18 nwchem-4.0-LINUX.tgz -rw-r--r-- 1 matt choptuik 1353 Dec 17 09:16 nwchem-4.0.JAVA.Manifest -rw-r--r-- 1 matt choptuik 21159 Dec 17 09:15 nwchem-4.0.JAVA.tgz # Need to re-tar with nicer set of permissions ... # As matt@vnfe1 cda scp -r matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/autoconf/nwchem . foreach f (*tgz) tar zxf $f end # May not be a great idea, some of the packages, at least, are installing # in-place. # From user documentation ... If running in parallel across multiple machines, you should consider applying this patch to your kernel to boost the performance of TCP/IP http://www.icase.edu/coral/LinuxTCP.html # Java NWChem works on bh1, but not on vnfe1 (would need to install JDK or # equivalent on vn machines) /d/vnfe1/home/matt/autoconf/nwchem/QA/HOW-TO-RUN-TESTS The Quality Assurance (QA) tests are designed to test most of the functionality of NWChem. As such, it is useful to run at least some of the tests when first installing NWChem at a site. It is imperative to run these tests when porting to a new platform. The directions given below for running the tests are for systems without a batch system. If you have a batch system, check out the contrib directory to see if there is an appropriate batch submission script. You will then need to run each of the tests seperately and check the results (the nwparse.pl script can be used for the quantum (QM) and pspw tests for this purpose). # MWC: But note omission of NWCHEM_BASIS_LIBRARY!! Here are some steps and notes on running the QA tests: 1) Set the environment variable NWCHEM_EXECUTABLE to the executable you want to use, e.g. % setenv NWCHEM_EXECUTABLE \ % $NWCHEM_TOP/bin/${NWCHEM_TARGET}_${NWCHEM_TARGET_CPU}/nwchem 2) If you compiled without MPI (this is the default way to build NWChem), you will need to: a) Set the environment variable PARALLEL_PATH to the location of the parallel program, e.g. % setenv PARALLEL_PATH \ % $NWCHEM_TOP/bin/${NWCHEM_TARGET}_${NWCHEM_TARGET_CPU}/parallel b) Run the QM tests sequentially using the doqmtests script. Note that you may want to comment out the largest tests at the bottom of the doqmtests file on slower machines or machines without much memory. Also, files will be placed in the $NWCHEM_TOP/QA/testoutputs directory. You may wish to clean out th % doqmtests >& doqmtests.log & c) Check the doqmtests.log file for potential problems. While running, the test scripts place files in the $NWCHEM_TOP/QA/testoutputs directory. You may wish to clean out this directory after checking that everything is working. If a job did not work, the output can be found in the $NWCHEM_TOP/QA/testoutputs directory. If the problem seems significant and/or you are unsure whether NWChem performed the calculation correctly, please send a message to nwchem-support@emsl.pnl.gov with details about your computer, the environment variables that were set when you compiled NWChem, and the output of the calculation that you are concerned about. d) Run the QM tests in parallel by editing the doqmtests script so that "procs #" is placed after the runtests.unix commands (substituting in the number of processors that you want to use for #). E.g. runtests.unix procs 2 h2o_dk u_sodft cosmo_h2o ch5n_nbo h2s_finite e) Again check the log for potential problems. f) Run most of the molecular dynamics (MD) tests using the runtest.md script. Note that this script assumes that you have a /tmp directory and that you want to use 2 processes. Both of these may be changes. % runtest.md >& runtest.md.log & g) Check the log (runtest.md.log) for potential problems. 3) If you compiled with MPI, you will need to a) Set the environment variable MPIRUN_PATH to the location of mpirun if it is not in your path, e.g. % setenv MPIRUN_PATH /usr/local/bin/mpirun b) If the mpirun processor definition option is not -np, you will need to set the environment varibale MPIRUN_NPOPT to the appropriate flag, e.g. % setenv MPIRUN_NPOPT -n c) Run the doqmtests and runtest.md scripts as described above, but first edit those files to substitute "runtests.mpi.unix" for "runtests.unix" and "runtest.unix" d) Check the log for potential problems. #----------------------------------------------------------------------- # TESTING #----------------------------------------------------------------------- setenv NWCHEM_EXECUTABLE /d/vnfe1/home/matt/autoconf/nwchem/bin/nwchem setenv PARALLEL_PATH /d/vnfe1/home/matt/autoconf/nwchem/bin/parallel setenv NWCHEM_BASIS_LIBRARY /d/vnfe1/home/matt/autoconf/nwchem/usr.local.lib.nwchem/library # In matt@vnfe1:~/autoconf/nwchem cp -r usr.local.lib.nwchem lib chmod a+rx bin doc etc lib classes java_support_tool QA chmod -R a+r bin doc etc lib classes java_support_tool QA # Coding vnInstallNWchem to install bin doc etc lib classes java_support_tool # in /usr/local/nwchem ssh root@vnfe1 '/bin/rm -r /usr/local/nwchem' ssh root@vnfe1 vnInstallNWchem # Added following lines to master csh.cshrc # /usr/local/nwchem to path ? (no) setenv NWCHEM_EXECUTABLE /usr/local/nwchem/bin/nwchem setenv PARALLEL_PATH /usr/local/nwchem/bin/parallel setenv NWCHEM_BASIS_LIBRARY /usr/local/nwchem/lib/library vnDistEtc csh.cshrc printenv NWCHEM_EXECUTABLE printenv PARALLEL_PATH printenv NWCHEM_BASIS_LIBRARY # As matt@laplace cd /tmp cp -r /usr/local/nwchem/QA . # Looks like tests run OK ssh root@vnfe1 '/bin/rm -r /usr/local/nwchem' vnallbgCommand 'vnInstallNWchem' vnallCommand 'ls -lt /usr/local/nwchem' vnallbgCommand 'cd /tmp; cp -r /usr/local/nwchem/QA/tests/auh2o .; cd auh2o; /usr/local/nwchem/bin/nwchem auh2o;' vnallCommand 'cd /tmp/auh2o; ls -lt' # Documentation to 'laplace' # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN/nwchem scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/autoconf/nwchem/doc . ############################################################ Sun Dec 17 10:54:54 PST 2000 ############################################################ (1) Something wrong with vn5 # Can't reboot, get kernel panic trying to mount one of the file systems # Naive attempt to use rescue floppy also unsuccessful. # Jason fixed with manual fsck via one of bh machines ############################################################ Wed Dec 20 15:58:01 PST 2000 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2000-12-20-1558.tar.gz # laplace:/usr2/people/matt/system/vnArchive /bin/rm -rf 2000-12-20-1558 cd /home/matt/system/vnshadow/Rtop.2000 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-12-06-1811.tar.gz cd Rtop.2000-12-06-1811 mv * .. cd .. tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2000-12-20-1558.tar.gz cd Rtop.2000-12-20-1558 mv * .. cd .. rmdir Rtop.2000-12-06-1811 Rtop.2000-12-20-1558 cd /home/matt/system/vnshadow/Rtop.2000 LS | grep '2000:11' | pre CP | post ../Rtop.2000.11 | csh mkdir ../Rtop.2000.12 LS | grep '2000:12' | pre CP | post ../Rtop.2000.12 | csh cd /home/matt/system/vnshadow/Rtop.2000.11 vnAccount mv ../vnAccount.11 ../vnAccount.11.O LS | grep -v '^2000' | pre MV | post '../vnAccount.11' | csh cd /home/matt/system/vnshadow/Rtop.2000.12 vnAccount mkdir ../vnAccount.12 LS | grep -v '^2000' | pre MV | post '../vnAccount.12' | csh # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.11 vnAccount.11.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.11 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.12 . ############################################################ Fri Jan 12 08:42:21 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-01-12-0842.tar.gz # laplace:/usr2/people/matt/system/vnArchive # 4108 4108 78052 cd /home/matt/system/vnshadow/Rtop.2000 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2001-01-12-0842.tar.gz cd Rtop.2001-01-12-0842 mv * .. cd .. rmdir Rtop.2001-01-12-0842 cd /home/matt/system/vnshadow/Rtop.2000 LS | grep '2000:12' | pre CP | post ../Rtop.2000.12 | csh cd /home/matt/system/vnshadow/Rtop.2000.12 vnAccount LS | grep -v '^2000' | pre MV | post '../vnAccount.12' | csh # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.12 vnAccount.12.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.12 . cd /home/matt/system/vnArchive /bin/rm -rf 2000-01-12-0842 ############################################################ Mon Jan 15 12:56:02 PST 2001 ############################################################ (1) PGI compilers not responding vnfe3:/home 100% full ! 5619572 suqin 2050373 fengxs 2012633 wkb 1632453 bian 1613848 jasonz 630775 zheqiong # Killed all of suqin's 'a.out' jobs cd /home/suqin/jason/v_low RM pp*/fort.9 # Restarted PGI compilers ############################################################ Mon Jan 29 18:10:06 PST 2001 ############################################################ (1) Updating bind as per securiry advisory Downloaded bind-8.2.3-1.2mdk.i586.rpm bind-devel-8.2.3-1.2mdk.i586.rpm bind-utils-8.2.3-1.2mdk.i586.rpm from http://chronos.iut-bm.univ-fcomte.fr/Mandrake/updates/6.1/RPMS/ # As matt@vnfe1 cd RPM scp matt@dsl105.net.ubc.ca:/home/matt/bind-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/bind-devel-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/bind-utils-8.2.3-1.2mdk.i586.rpm . md5sum bind*rpm dc329783fa290dac8cd5a738df8a82d8 bind-8.2.3-1.2mdk.i586.rpm ad9b06a8481ccde39ef1eb7c597d043e bind-devel-8.2.3-1.2mdk.i586.rpm 3df3e8047e198aa0be440308ba74adb6 bind-utils-8.2.3-1.2mdk.i586.rpm # As root@vn35 cd ~matt/RPM/; rpm -Fvh bin*rpm; rpm --query bind vnallbgCommand 'cd ~matt/RPM/; rpm -Fvh bin*rpm; rpm --query bind' vnallCommand 'rpm --query bind' # BH machines 7.0 6d1b647fe11e466a4ee3ef75424da068 bind-8.2.3-1.2mdk.i586.rpm 45d5e366eaf3b2c1cf7c9820e681de1d bind-devel-8.2.3-1.2mdk.i586.rpm b3cc1eed184505ec95e34a9fd78e65e3 bind-utils-8.2.3-1.2mdk.i586.rpm # As matt@bh1 cd RPM scp matt@dsl105.net.ubc.ca:/home/matt/7.0/bind-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/7.0/bind-devel-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/7.0/bind-utils-8.2.3-1.2mdk.i586.rpm . bhCommand /etc/security/msec/init.sh bhbgCommand 'cd ~matt/RPM/; rpm -Fvh bin*rpm; rpm --query bind' bhCommand 'rpm --query bind' # LNX machines 7.1 5ab9bf322cecb913c6649540d5819e3d bind-8.2.3-1.2mdk.i586.rpm eac8d15ebdfaeb5add97c8a9e0058fa7 bind-devel-8.2.3-1.2mdk.i586.rpm df714be43fe1f122dcff839c85d00719 bind-utils-8.2.3-1.2mdk.i586.rpm # As matt@lnx1 cd RPM scp matt@dsl105.net.ubc.ca:/home/matt/7.1/bind-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/7.1/bind-devel-8.2.3-1.2mdk.i586.rpm . scp matt@dsl105.net.ubc.ca:/home/matt/7.1/bind-utils-8.2.3-1.2mdk.i586.rpm . lnxCommand /etc/security/msec/init.sh lnxbgCommand 'cd /d/lnx1/home/matt/RPM/; rpm -Fvh bin*rpm; rpm --query bind' lnxCommand 'rpm --query --all | grep -i bind' # bind, bind-devel apparently not installed on these machines # As root@dsl105 cd ~matt/6.1 rpm -Fvh bin*rpm; rpm --query bind ############################################################ Wed Jan 31 10:39:32 PST 2001 ############################################################ (1) New account for Yun-Bo Duan (Irving Ozier's postdoc) nu cat<yduan yduan:x:9045:9000:Yun-Bo Duan:/d/vnfe1/home/yduan:/bin/tcsh END vnNewUsers yduan cd Blurbs cp dsteck yduan vi yduan TODO etc; sola vs # pnwkBfAJjUx/M vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~yduan; echo yduan@physics.ubc.ca > .forward; chown yduan.other .forward; ls -al; cat .forward' ############################################################ Thu Feb 1 09:01:07 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-02-01-0901.tar.gz # laplace:/usr2/people/matt/system/vnArchive # 3547 3547 67393 cd /home/matt/system/vnshadow/ mkdir Rtop.2001 cd Rtop.2000 LS | grep '^2001:01' | pre MV | post ../Rtop.2001 | csh cd ../Rtop.2001 tar zxf /d/vnfe1/home/matt/system/vnArchive/Rtop.2001-02-01-0901.tar.gz cd Rtop.2001-02-01-0901 mv * .. cd .. rmdir Rtop.2001-02-01-0901 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '2001:01' | pre CP | post ../Rtop.2001.01 | csh cd /home/matt/system/vnshadow/ mkdir vnAccount.01.01 cd /home/matt/system/vnshadow/Rtop.2001.01 vnAccount LS | grep -v '^2001' | pre MV | post '../vnAccount.01.01' | csh # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mkdir vnAccount.01.01 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.01 . cd /home/matt/system/vnArchive /bin/rm -rf Rtop.2001-02-01-0901 ############################################################ Sun Feb 4 12:43:29 PST 2001 ############################################################ Rebooted vn20 ... see README.CRASH/CRASH_82 vn20's load average at about 60, can still log in, but can't, e.g. cd to ~idle reboot minghe, fransp, ytwang were running ############################################################ Mon Feb 5 13:18:58 PST 2001 ############################################################ (1) vn5 had to be rebooted, some suspicious error messages: Feb 4 23:25:30 vn5 kernel: free_one_pmd: bad directory entry 00000002 Feb 4 23:25:30 vn5 kernel: free_one_pmd: bad directory entry 00000004 Feb 4 23:25:30 vn5 last message repeated 8 times Feb 4 23:25:30 vn5 PAM_pwdb[24504]: (rsh) session closed for user roman Feb 4 23:30:00 vn5 kernel: free_one_pmd: bad directory entry 00000004 Feb 4 23:30:00 vn5 last message repeated 22 times Feb 4 23:30:00 vn5 kernel: free_one_pmd: bad directory entry 00000006 Feb 4 23:30:00 vn5 kernel: free_one_pmd: bad directory entry 00000004 Feb 4 23:30:00 vn5 last message repeated 15 times Feb 4 23:31:45 vn5 sshd[24547]: log: Connection from 142.103.237.225 port 1011 Feb 4 23:31:45 vn5 sshd[24547]: log: RSA authentication for idle accepted. Feb 4 23:31:45 vn5 kernel: free_one_pmd: bad directory entry 00000004 Feb 4 23:31:45 vn5 last message repeated 22 times Feb 4 23:31:45 vn5 kernel: free_one_pmd: bad directory entry 00000006 Feb 4 23:31:45 vn5 kernel: free_one_pmd: bad directory entry 00000004 ############################################################ Thu Feb 22 17:41:18 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-02-22-1741.tar.gz # laplace:/usr2/people/matt/system/vnArchive # 3384 3384 64296 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-02-22-1741.tar.gz cd Rtop.2001-02-22-1741 mv * .. cd .. rmdir Rtop.2001-02-22-1741 cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.02 LS | grep '^2001:02' | pre CP | post '../Rtop.2001.02' | csh cd /home/matt/system/vnshadow/Rtop.2001.02 vnAccount mkdir ../vnAccount.01.02 mv `LS | grep -v '^200'` ../vnAccount.01.02 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.02 . ############################################################ ????? ############################################################ For all users jcohena lothar liam ghlim minghe suqin mcase luisl joos fransp fengxs daub vnallbgCommand 'ps -elf | grep ytwang | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep shuo | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep ghlim | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep yduan | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep jcohena | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep lothar | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep liam | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep ghlim | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep minghe | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep suqin | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep mcase | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep luisl | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep joos | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep fransp | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep fengxs | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep daub | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep idle | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep roman | grep -v grep | nth 4 | pre kill -9 | csh' vnnallbgCommand 'umount -a -f -t nfs; df' Shut down vn1-vn64 vnnallbgCommand '/sbin/shutdown -y -g0 -h' ??? Shut down vnfe1-vnfe3 vnfeallbgCommand 'umount -a -f -t nfs; df' ssh vnfe3 '/sbin/shutdown -y -g0 -h' ssh vnfe2 '/sbin/shutdown -y -g0 -h' vnNCommand '/sbin/shutdown -y -g0 -h &' ############################################################ Fri Feb 23 09:20:14 PST 2001 ############################################################ All nodes down ############################################################ Fri Feb 23 13:37:45 PST 2001 ############################################################ Nodes back cat /proc/cpuinfo ntptimeset vn1 vn2 vn3 vn4 vn5 vn6 vn7 vn8 vn9 vn10 vn11 vn12 vn13 vn14 vn15 vn16 vn17 vn18 vn19 vn20 vn21 vn22 vn23 vn24 vn25 vn26 vn27 vn28 vn29 vn30 vn31 vn32 vn33 vn34 vn35 vn36 vn37 vn38 vn39 vn40 vn41 vn42 vn43 vn44 vn45 vn46 vn47 vn48 vn49 vn50 vn51 vn52 vn53 vn54 vn55 vn56 vn57 vn58 vn59 vn60 vn61 vn62 vn63 vn64 vn10, vn13, vn23 need new power supplies BENCHMARKS (1) rnpl/wave2d a doit0 'ssh \!* "cd wave2d/f77; RM *.sdf; wave2df_init w2d_0; time wave2df w2d_0"' a doit1 'ssh \!* "cd wave2d/f77; RM *.sdf; wave2df_init w2d_1; time wave2df w2d_1"' a doit2 'ssh \!* "cd wave2d/f77; RM *.sdf; wave2df_init w2d_2; time wave2df w2d_2"' a doit0 'ssh \!* "cd wave2dPG/f77; RM *.sdf; wave2df_init w2d_0; time wave2df w2d_0"' vnfe1 7.060u 0.130s 0:07.17 100.2% 0+0k 0+0io 240pf+0w vn5 4.170u 0.140s 0:04.36 98.8% 0+0k 0+0io 171pf+0w a doit1 'ssh \!* "cd wave2dPG/f77; RM *.sdf; wave2df_init w2d_1; time wave2df w2d_1"' vnfe1 6.390u 0.120s 0:06.54 99.5% 0+0k 0+0io 330pf+0w vn5 4.230u 0.090s 0:04.47 96.6% 0+0k 0+0io 171pf+0w a doit2 'ssh \!* "cd wave2dPG/f77; RM *.sdf; wave2df_init w2d_2; time wave2df w2d_2"' vnfe1 5.390u 0.380s 0:05.80 99.4% 0+0k 0+0io 800pf+0w vnfe1 3.680u 0.230s 0:04.27 91.5% 0+0k 0+0io 171pf+0w (2) graxi 48x96x10 vnfe1 43.740u 0.100s 0:43.88 99.9% 0+0k 0+0io 446pf+0w vn2 23.980u 0.130s 0:24.27 99.3% 0+0k 0+0io 258pf+0w graxi 96x192x10 vnfe1 59.400u 0.600s 1:00.20 99.6% 0+0k 0+0io 258pf+0w vn2 32.940u 0.310s 0:33.51 99.2% 0+0k 0+0io 258pf+0w (3) emkgcnad vnfe1 31.940u 0.080s 0:32.09 99.7% 0+0k 0+0io 485pf+0w vn2 16.580u 0.080s 0:16.66 100.0% 0+0k 0+0io 484pf+0w ############################################################ Fri Feb 23 17:17:20 PST 2001 ############################################################ Upgrade nominally complete vnNCommand "cd ~matt/wave2dPG/f77; RM *.sdf; wave2df_init w2d_0; time wave2df w2d_0" > /tmp/bench 1: 4.140u 0.160s 0:04.33 99.3% 0+0k 0+0io 171pf+0w 2: 4.190u 0.180s 0:04.39 99.5% 0+0k 0+0io 171pf+0w 3: 4.170u 0.190s 0:04.38 99.5% 0+0k 0+0io 171pf+0w 4: 4.080u 0.220s 0:04.30 100.0% 0+0k 0+0io 171pf+0w 5: 4.150u 0.180s 0:04.37 99.0% 0+0k 0+0io 171pf+0w 6: 4.050u 0.220s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 7: 4.110u 0.200s 0:04.32 99.7% 0+0k 0+0io 171pf+0w 8: 4.070u 0.190s 0:04.35 97.9% 0+0k 0+0io 171pf+0w 9: 4.090u 0.180s 0:04.33 98.6% 0+0k 0+0io 171pf+0w 10: 4.160u 0.130s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 11: 4.120u 0.150s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 12: 4.090u 0.200s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 13: 4.100u 0.150s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 14: 4.090u 0.200s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 15: 4.100u 0.170s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 16: 4.120u 0.180s 0:04.32 99.5% 0+0k 0+0io 171pf+0w 17: 4.140u 0.150s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 18: 4.140u 0.160s 0:04.33 99.3% 0+0k 0+0io 171pf+0w 19: 4.110u 0.180s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 20: 4.060u 0.240s 0:04.32 99.5% 0+0k 0+0io 171pf+0w 21: 4.120u 0.160s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 22: 4.130u 0.190s 0:04.34 99.5% 0+0k 0+0io 171pf+0w 23: 4.130u 0.170s 0:04.33 99.3% 0+0k 0+0io 171pf+0w 24: 4.120u 0.140s 0:04.35 97.9% 0+0k 0+0io 171pf+0w 25: 4.110u 0.150s 0:04.31 98.8% 0+0k 0+0io 171pf+0w 26: 4.100u 0.140s 0:04.30 98.6% 0+0k 0+0io 171pf+0w 27: 4.120u 0.130s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 28: 4.170u 0.120s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 29: 4.080u 0.210s 0:04.33 99.0% 0+0k 0+0io 171pf+0w 30: 4.120u 0.140s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 31: 4.120u 0.160s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 32: 4.120u 0.150s 0:04.33 98.6% 0+0k 0+0io 171pf+0w 33: 4.140u 0.110s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 34: 4.120u 0.130s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 35: 4.130u 0.140s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 36: 4.090u 0.160s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 37: 4.100u 0.160s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 38: 4.180u 0.150s 0:04.32 100.2% 0+0k 0+0io 171pf+0w 39: 4.100u 0.170s 0:04.32 98.8% 0+0k 0+0io 172pf+0w 40: 4.000u 0.290s 0:04.33 99.0% 0+0k 0+0io 171pf+0w 41: 4.140u 0.170s 0:04.35 99.0% 0+0k 0+0io 171pf+0w 42: 4.060u 0.230s 0:04.35 98.6% 0+0k 0+0io 171pf+0w 43: 4.110u 0.180s 0:04.32 99.3% 0+0k 0+0io 171pf+0w 44: 4.040u 0.220s 0:04.32 98.6% 0+0k 0+0io 172pf+0w 45: 4.130u 0.140s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 46: 4.100u 0.200s 0:04.33 99.3% 0+0k 0+0io 171pf+0w 47: 4.150u 0.110s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 48: 4.150u 0.120s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 49: 4.140u 0.140s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 50: 4.040u 0.210s 0:04.32 98.3% 0+0k 0+0io 171pf+0w 51: 4.090u 0.190s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 52: 4.090u 0.180s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 53: 4.120u 0.160s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 54: 4.150u 0.130s 0:04.32 99.0% 0+0k 0+0io 171pf+0w 55: 4.050u 0.220s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 56: 4.030u 0.230s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 57: 4.060u 0.200s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 58: 4.150u 0.150s 0:04.32 99.5% 0+0k 0+0io 171pf+0w 59: 4.050u 0.220s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 60: 4.080u 0.180s 0:04.32 98.6% 0+0k 0+0io 171pf+0w 61: 4.090u 0.180s 0:04.30 99.3% 0+0k 0+0io 171pf+0w 62: 4.080u 0.190s 0:04.32 98.8% 0+0k 0+0io 171pf+0w 63: 4.120u 0.160s 0:04.33 98.8% 0+0k 0+0io 171pf+0w 64: 4.150u 0.120s 0:04.32 98.8% 0+0k 0+0io 171pf+0w vnNCommand 'grep -i processor /proc/cpuinfo' >>> Executing as root@142.103.237.1 processor : 0 processor : 1 >>> Executing as root@142.103.237.2 processor : 0 processor : 1 >>> Executing as root@142.103.237.3 processor : 0 processor : 1 >>> Executing as root@142.103.237.4 processor : 0 processor : 1 >>> Executing as root@142.103.237.5 processor : 0 processor : 1 >>> Executing as root@142.103.237.6 processor : 0 processor : 1 >>> Executing as root@142.103.237.7 processor : 0 processor : 1 >>> Executing as root@142.103.237.8 processor : 0 processor : 1 >>> Executing as root@142.103.237.9 processor : 0 processor : 1 >>> Executing as root@142.103.237.10 processor : 0 processor : 1 >>> Executing as root@142.103.237.11 processor : 0 processor : 1 >>> Executing as root@142.103.237.12 processor : 0 processor : 1 >>> Executing as root@142.103.237.13 processor : 0 processor : 1 >>> Executing as root@142.103.237.14 processor : 0 processor : 1 >>> Executing as root@142.103.237.15 processor : 0 processor : 1 >>> Executing as root@142.103.237.16 processor : 0 processor : 1 >>> Executing as root@142.103.237.17 processor : 0 processor : 1 >>> Executing as root@142.103.237.18 processor : 0 processor : 1 >>> Executing as root@142.103.237.19 processor : 0 processor : 1 >>> Executing as root@142.103.237.20 processor : 0 processor : 1 >>> Executing as root@142.103.237.21 processor : 0 processor : 1 >>> Executing as root@142.103.237.22 processor : 0 processor : 1 >>> Executing as root@142.103.237.23 processor : 0 processor : 1 >>> Executing as root@142.103.237.24 processor : 0 processor : 1 >>> Executing as root@142.103.237.25 processor : 0 processor : 1 >>> Executing as root@142.103.237.26 processor : 0 processor : 1 >>> Executing as root@142.103.237.27 processor : 0 processor : 1 >>> Executing as root@142.103.237.28 processor : 0 processor : 1 >>> Executing as root@142.103.237.29 processor : 0 processor : 1 >>> Executing as root@142.103.237.30 processor : 0 processor : 1 >>> Executing as root@142.103.237.31 processor : 0 processor : 1 >>> Executing as root@142.103.237.32 processor : 0 processor : 1 >>> Executing as root@142.103.237.33 processor : 0 processor : 1 >>> Executing as root@142.103.237.34 processor : 0 processor : 1 >>> Executing as root@142.103.237.35 processor : 0 processor : 1 >>> Executing as root@142.103.237.36 processor : 0 processor : 1 >>> Executing as root@142.103.237.37 processor : 0 processor : 1 >>> Executing as root@142.103.237.38 processor : 0 processor : 1 >>> Executing as root@142.103.237.39 processor : 0 processor : 1 >>> Executing as root@142.103.237.40 processor : 0 processor : 1 >>> Executing as root@142.103.237.41 processor : 0 processor : 1 >>> Executing as root@142.103.237.42 processor : 0 processor : 1 >>> Executing as root@142.103.237.43 processor : 0 processor : 1 >>> Executing as root@142.103.237.44 processor : 0 processor : 1 >>> Executing as root@142.103.237.45 processor : 0 processor : 1 >>> Executing as root@142.103.237.46 processor : 0 processor : 1 >>> Executing as root@142.103.237.47 processor : 0 processor : 1 >>> Executing as root@142.103.237.48 processor : 0 processor : 1 >>> Executing as root@142.103.237.49 processor : 0 processor : 1 >>> Executing as root@142.103.237.50 processor : 0 processor : 1 >>> Executing as root@142.103.237.51 processor : 0 processor : 1 >>> Executing as root@142.103.237.52 processor : 0 processor : 1 >>> Executing as root@142.103.237.53 processor : 0 processor : 1 >>> Executing as root@142.103.237.54 processor : 0 processor : 1 >>> Executing as root@142.103.237.55 processor : 0 processor : 1 >>> Executing as root@142.103.237.56 processor : 0 processor : 1 >>> Executing as root@142.103.237.57 processor : 0 processor : 1 >>> Executing as root@142.103.237.58 processor : 0 processor : 1 >>> Executing as root@142.103.237.59 processor : 0 processor : 1 >>> Executing as root@142.103.237.60 processor : 0 processor : 1 >>> Executing as root@142.103.237.61 processor : 0 processor : 1 >>> Executing as root@142.103.237.62 processor : 0 processor : 1 >>> Executing as root@142.103.237.63 processor : 0 processor : 1 >>> Executing as root@142.103.237.64 processor : 0 processor : 1 ############################################################ Sat Feb 24 08:46:12 PST 2001 ############################################################ vn5, vn10 down vn10 down 8:13 vn5 down 7:58 ############################################################ Sat Feb 24 15:56:23 PST 2001 ############################################################ vn10 down again, putting in call to Bill (note, this is one of the machines which had power supply changed at end of upgrade) Sat Feb 24 16:38:40 PST 2001 Power supply replaced ############################################################ Sun Feb 25 07:13:06 PST 2001 ############################################################ vn5's load average through the roof again (as per CRASH_87 Sun Dec 17 10:53:46 PST 2000) ... bad disk? Kernel panic, but may be problem with disk? ############################################################ Tue Feb 27 18:46:31 PST 2001 ############################################################ vn5 is down again, need to pull it and get it fixed up in Hennings ############################################################ Tue Mar 6 16:39:15 PST 2001 ############################################################ (1) New accounts for Phil Austin Ming Zhao nu cat<paustin paustin:x:9046:9000:Phil Austin:/d/vnfe1/home/paustin:/bin/tcsh END cat<zming zming:x:9047:9000:Ming Zhao:/d/vnfe1/home/zming:/bin/tcsh END vnNewUsers paustin vnNewUsers zming cd Blurbs cp dsteck paustin cp dsteck zming vi paustin zming TODO etc; sola # $1$x0kRo182$QXmSMDWBo8RnR97jN5JIP0 vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~paustin; echo paustin@eos.ubc.ca > .forward; chown paustin.other .forward; ls -al; cat .forward' ssh root@vnfe1 'cd ~zming; echo zming@geogubc.ca > .forward; chown zming.other .forward; ls -al; cat .forward' ############################################################ Thu Mar 8 16:48:15 PST 2001 ############################################################ (1) New account for Kamson Lai (Undergrad student working with Douglas Scott) nu cat<ylai ylai:x:9048:9000:Kamson Lai:/d/vnfe1/home/ylai:/bin/tcsh END vnNewUsers ylai cd Blurbs cp dsteck ylai vi ylai TODO etc; sola vs # ILMRh2ebWy102 vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~ylai; echo ylai@physics.ubc.ca > .forward; chown ylai.other .forward; ls -al; cat .forward' ############################################################ Sat Mar 24 11:03:42 PST 2001 ############################################################ vn32 down 12:44 vnfe3 down 13+21:38 see README.CRASH CRASH_89 ############################################################ Sat Mar 24 14:22:04 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-03-24-1422.tar.gz # laplace:/usr2/people/matt/system/vnArchive # 3878 3878 73682 cd vnArchive RM -r Rtop.2001-03-24-1422 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-03-24-1422.tar.gz cd Rtop.2001-03-24-1422 mv * .. cd .. rmdir Rtop.2001-03-24-1422 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:02' | pre CP | post '../Rtop.2001.02' | csh mkdir ../Rtop.2001.03 LS | grep '^2001:03' | pre CP | post '../Rtop.2001.03' | csh cd /home/matt/system/vnshadow/Rtop.2001.02 vnAccount mv `LS | grep -v '^200'` ../vnAccount.01.02 cd /home/matt/system/vnshadow/Rtop.2001.03 mkdir ../vnAccount.01.03 vnAccount mv `LS | grep -v '^200'` ../vnAccount.01.03 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.02 vnAccount.01.02.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.02 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.03 . ############################################################ Fri Apr 6 13:20:42 PDT 2001 ############################################################ (1) vn25 down See README.CRASH CRASH_90 (2) rpc.mountd needed restarting on vnfe[123] # As root@vnfe[123] killall rpc.mountd /usr/sbin/rpc.mountd (3) PGI license daemon needed re-starting vnfeCommand 'killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit vnfeCommand 'cd TestPGI; make clean; make' # Could make this easier by installing appropriate keys in ~adm/.ssh ############################################################ Sat Apr 7 16:36:27 PDT 2001 ############################################################ (1) Updated 'ntpd' after patching etc. See (laplace:~/matt/system/NTP/README) (2) Problem with vn25, ssh generates segmentation fault, time is off, rebooted Didn't come back ############################################################ Sun Apr 8 11:49:08 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-04-08-1149.tar.gz # 2286 2286 43434 # laplace:/usr2/people/matt/system/vnArchive # 2286 2286 43434 cd vnArchive RM -r Rtop.2001-04-08-1149 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-04-08-1149.tar.gz cd Rtop.2001-04-08-1149 mv * .. cd .. rmdir Rtop.2001-04-08-1149 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:03' | pre CP | post '../Rtop.2001.03' | csh mkdir ../Rtop.2001.04 LS | grep '^2001:04' | pre CP | post '../Rtop.2001.04' | csh cd /home/matt/system/vnshadow/Rtop.2001.03 mv ../vnAccount.01.03 ../vnAccount.01.03.O mkdir ../vnAccount.01.03 vnAccount mv `LS | grep -v '^200'` ../vnAccount.01.03 cd /home/matt/system/vnshadow/Rtop.2001.04 mkdir ../vnAccount.01.04 vnAccount mv `LS | grep -v '^200'` ../vnAccount.01.04 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.03 vnAccount.01.03.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.03 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.04 . ############################################################ Fri Apr 20 12:30:14 PDT 2001 ############################################################ (1) vn20, vn25 back after memory replacments, need secondary installations, kernel patching, /usr/local etc. Following README.NEW ############################################################ ### Secondary set-up of new nodes ### ### vn20, vn25 ############################################################ ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 # As root@vn{20,25} cd /tmp ftp vnfe1 < 59->62 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (3) Matt setup Obsolete?? ###--------------------------------------------------------- vnCommand /d/vnfe1/home/matt/scripts/vnSetupMatt vnCommand 'cat ~matt/.ssh/identity.pub' > /tmp/IDENTITY # Missing vn45 --- vn47 due to NSF mounting "udpated" password files # Update master_authorized_keys ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd # Modify ~matt/scripts/vnN to include new nodes etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow vnDistEtc resolv.conf vnDistEtc hosts.allow vnDistEtc hosts.deny ###--------------------------------------------------------- ### (4.5) Rather than manually installing everything, will ### tar up /usr/local from vn5, then install ###--------------------------------------------------------- # As root@vn5 cd /usr tar cf local.tar local # As root@vn{20,25} cd /usr mv local local.O ftp vn5 matt cd /usr get local.tar quit tar xf local.tar vnMakeMPIMachines 1 64 vnMakeMPIPGIMachines 1 64 # Testing MPI on vn20 vn25 vnMpptest vn20 vn25 vnTop vn20 vn25 ssh vn20 'killall mpptest' ssh vn25 'killall mpptest' # OK ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnii ###--------------------------------------------------------- ### (10) NTP ###--------------------------------------------------------- etc scp ntp.conf root@vn20:/etc scp ntp.conf root@vn25:/etc scp ~matt/system/vn/image/master/etc/rc.local root@vn20:/etc/rc.d/rc.local scp ~matt/system/vn/image/master/etc/rc.local root@vn25:/etc/rc.d/rc.local ssh root@vn20 /usr/local/bin/ntpd ssh root@vn25 /usr/local/bin/ntpd ntptimeset ssh root@vn20 vnSetdate ssh root@vn20 ntptimeset ssh root@vn25 vnSetdate ssh root@vn25 ntptimeset ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn20 'vnnewK' ssh root@vn25 'vnnewK' # Modified lilo.conf.nodes.2.2.14-Psmp # Removed image=/boot/vmlinuz-2.2.13-7Pmdksmp label=linux root=/dev/hda1 read-only # from script and lilo.conf on new nodes /sbin/lilo # reboot of vn20 didn't work :-( Disks are mounted as /dev/hda2 !!! # Will probably need a re-install/update on vn20 TODO hwclock --systohc hwclock --show ############################################################ Sat Apr 21 08:11:39 PDT 2001 ############################################################ (1) vn20 bcak up after re-partitioning, reinstall, disk mounted as /dev/hda1 installations, kernel patching, /usr/local etc. ############################################################ ### Secondary set-up of vn20 ############################################################ ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Remove vn20, 142.103.237.20 from known hosts matt@vnfe1 # Modified ~/.ssh/known_hosts cd /home/matt/system/vn/image/master/ssh ./Dist matt@laplace # Modified ~/.ssh/known_hosts matt@bh1 # Modified ~/.ssh/known_hosts root@bh1 # Modified ~/.ssh/known_hosts # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 # As root@vn{20,25} cd /tmp ftp vnfe1 < 20 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd # Modify ~matt/scripts/vnN to include new nodes etc vnDistEtc group passwd shadow resolv.conf hosts.allow hosts.deny ssh root@vn20 'killall -HUP inetd' ###--------------------------------------------------------- ### (4.5) Rather than manually installing everything, will ### tar up /usr/local from vn5, then install ###--------------------------------------------------------- PREVIOUSLY # As root@vn5 cd /usr tar cf local.tar local # As root@vn20 cd /usr mv local local.O ftp vn5 matt cd /usr get local.tar quit tar xf local.tar vnMakeMPIMachines 1 64 vnMakeMPIPGIMachines 1 64 # Testing MPI on vn20 vn25 vnMpptest vn20 vn25 vnTop vn20 vn25 ssh vn20 'killall mpptest' ssh vn25 'killall mpptest' # OK ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnii ###--------------------------------------------------------- ### (10) NTP ###--------------------------------------------------------- etc scp ntp.conf root@vn20:/etc scp ~matt/system/vn/image/master/etc/rc.local root@vn20:/etc/rc.d/rc.local ssh root@vn20 vnSetdate ssh root@vn20 hwclock --systohc ssh root@vn20 hwclock --show ssh root@vn20 /usr/local/bin/ntpd ssh root@vn20 ntptimeset ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn20 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn20 'vnnewK' ssh root@vn20 'cat /etc/lilo.conf' ssh root@vn20 /sbin/lilo reboot # OK ############################################################ Sat Apr 21 08:51:03 PDT 2001 ############################################################ (1) ntpd not running on many nodes (too much skew?), didn't come up on reboot on # Had copied rc.local to /etc, rather than to /etc/rc.d vnNCommand 'ls /etc/rc.local' ssh root@vn20 '/bin/rm /etc/rc.local' ssh root@vn25 '/bin/rm /etc/rc.local' scp ~matt/system/vn/image/master/etc/rc.local root@vn20:/etc/rc.d/rc.local scp ~matt/system/vn/image/master/etc/rc.local root@vn25:/etc/rc.d/rc.local vnallCommand 'jj ntpd | grep -v jj' ############################################################ Sat Apr 21 09:19:36 PDT 2001 ############################################################ ... and after all that, vn20 just crashed again! ############################################################ Mon Apr 23 14:21:44 PDT 2001 ############################################################ vn5's load average through roof, unresponsive. suggest disk replacement ############################################################ Mon Apr 23 14:22:52 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-04-23-1423.tar.gz # 2599 2599 49381 # laplace:/usr2/people/matt/system/vnArchive # 2599 2599 49381 cd vnArchive RM -r Rtop.2001-04-23-1423 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-04-23-1423.tar.gz cd Rtop.2001-04-23-1423 mv * .. cd .. rmdir Rtop.2001-04-23-1423 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:04' | pre CP | post '../Rtop.2001.04' | csh cd /home/matt/system/vnshadow/Rtop.2001.04 mv ../vnAccount.01.04 ../vnAccount.01.04.O vnAccount mkdir ../vnAccount.01.04 mv `LS | grep -v '^200'` ../vnAccount.01.04 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.04 vnAccount.01.04.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.04 . ############################################################ Thu Apr 26 19:03:28 PDT 2001 ############################################################ vn43 down, see CRASH_94 ############################################################ Fri Apr 27 17:40:20 PDT 2001 ############################################################ (1) vn20 back up after disk replacement, re-partitioning, reinstall, disk mounted as /dev/hda1 ############################################################ ### Secondary set-up of vn20 ############################################################ vnN does not include 5, 20 ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Modified matt@laplace:~/.ssh to distribute known_hosts as # well as authorized_keys # Remove vn20, 142.103.237.20 from known hosts # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 etc sola vs vnDistEtc shadow # As root@vn{20} cd /tmp ftp vnfe1 < 20 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd etc sola vs :n shadow.O # Modify ~matt/scripts/vnN to include new nodes viw vnN etc vnDistEtc group passwd shadow resolv.conf hosts.allow hosts.deny vnDistDot .aliases .cshrc .exrc .rhosts ssh root@vn20 'killall -HUP inetd' ###--------------------------------------------------------- ### (4.5) Rather than manually installing everything, will ### tar up /usr/local from vn5, then install ###--------------------------------------------------------- # As root@vn1 cd /usr tar cf local.tar local # As root@vn20 cd /usr mv local local.O ftp vn1 matt cd /usr get local.tar quit tar xf local.tar vnMakeMPIMachines 1 64 vnMakeMPIPGIMachines 1 64 # Testing MPI on vn20 vn25 vnMpptest vn20 vn25 vnTop vn20 vn25 ssh vn20 'killall mpptest' ssh vn25 'killall mpptest' # OK ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnii ###--------------------------------------------------------- ### (10) NTP ###--------------------------------------------------------- etc scp ntp.conf root@vn20:/etc scp ~matt/system/vn/image/master/etc/rc.local root@vn20:/etc/rc.d/rc.local ssh root@vn20 vnSetdate ssh root@vn20 hwclock --systohc ssh root@vn20 hwclock --show ssh root@vn20 /usr/local/bin/ntpd ssh root@vn20 ntptimeset ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn20 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn20 'vnnewK' ssh root@vn20 'cat /etc/lilo.conf' ssh root@vn20 /sbin/lilo reboot # OK Fri Apr 27 18:07:11 PDT 2001 ############################################################ Mon Apr 30 09:32:59 PDT 2001 ############################################################ vn13 vn26 vn51 down see README.CRASH: CRASH_96, CRASH_97, CRASH_98 ############################################################ Tue May 1 19:02:14 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-05-01-1902.tar.gz # 1302 1302 24738 # laplace:/usr2/people/matt/system/vnArchive # 1302 1302 24738 cd vnArchive RM -r Rtop.2001-05-01-1902 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-05-01-1902.tar.gz cd Rtop.2001-05-01-1902 mv * .. cd .. rmdir Rtop.2001-05-01-1902 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:04' | pre CP | post '../Rtop.2001.04' | csh mkdir ../Rtop.2001.05 LS | grep '^2001:05' | pre CP | post '../Rtop.2001.05' | csh cd /home/matt/system/vnshadow/Rtop.2001.04 RM -r ../vnAccount.01.04.O mv ../vnAccount.01.04 ../vnAccount.01.04.O vnAccount mkdir ../vnAccount.01.04 mv `LS | grep -v '^200'` ../vnAccount.01.04 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.04.O mv vnAccount.01.04 vnAccount.01.04.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.04 . ############################################################ Thu May 3 17:32:42 PDT 2001 ############################################################ ############################################################ ### Secondary set-up of vn5 ############################################################ vnN does not include 5 ###--------------------------------------------------------- ### (1) SSH ###--------------------------------------------------------- # Modified matt@laplace:~/.ssh to distribute known_hosts as # well as authorized_keys # Remove vn5, 142.103.237.5 from known hosts # Accumulate keys in # matt@laplace.physics.ubc.ca:~/.ssh/master_authorized_keys # Remove matt's passwd on vnfe1 etc sola vs vnDistEtc shadow # As root@vn{5} cd /tmp ftp vnfe1 < 20 cds vnRemote vnNFSsetup ###--------------------------------------------------------- ### (4) /etc setup ###--------------------------------------------------------- # Restore Matt's passwd etc sola vs :n shadow.O # Modify ~matt/scripts/vnN to include new nodes viw vnN etc vnDistEtc group vnDistEtc passwd vnDistEtc shadow vnDistEtc resolv.conf vnDistEtc hosts.allow vnDistEtc hosts.deny vnDistDot .aliases vnDistDot .cshrc vnDistDot .exrc vnDistDot .rhosts ssh root@vn5 'killall -HUP inetd' ###--------------------------------------------------------- ### (4.5) Rather than manually installing everything, will ### tar up /usr/local from vn5, then install ###--------------------------------------------------------- # As root@vn1 # cd /usr # tar cf local.tar local # As root@vn5 cd /usr mv local local.O ftp vn1 matt bin cd /usr get local.tar quit tar xf local.tar vnMakeMPIMachines 1 64 vnMakeMPIPGIMachines 1 64 # Testing MPI on vn3 vn5 TODO vnMpptest vn3 vn5 vnTop vn3 vn5 ssh vn3 'killall mpptest' ssh vn5 'killall mpptest' # OK ###--------------------------------------------------------- ### (6) Distribute Install/Installz ###--------------------------------------------------------- vnii ###--------------------------------------------------------- ### (10) NTP ###--------------------------------------------------------- etc scp ntp.conf root@vn5:/etc scp ~matt/system/vn/image/master/etc/rc.local root@vn5:/etc/rc.d/rc.local ssh root@vn5 vnSetdate ssh root@vn5 hwclock --systohc ssh root@vn5 hwclock --show ssh root@vn5 /usr/local/bin/ntpd ssh root@vn5 ntptimeset ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn5 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn5 'vnnewK' ssh root@vn5 'cat /etc/lilo.conf' ssh root@vn5 /sbin/lilo reboot # OK ############################################################ Thu May 3 18:31:13 PDT 2001 ############################################################ (1) New account for Gustavo Narvaez (SFU Postdoc with Prof. F. Kirczenow) nu cat<gustavo gustavo:x:9049:9000:Gustavo Arnaldo Narvaez:/d/vnfe1/home/gustavo:/bin/tcsh END vnNewUsers gustavo cd Blurbs cp dsteck gustavo vi gustavo etc; sola vs # $1$0.XGoGbU$mbHBlW.teTxfIW43T1Lq8. vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~gustavo; echo gustavo@sfu.ca > .forward; chown gustavo.other .forward; ls -al; cat .forward' ############################################################ Mon May 7 16:40:47 PDT 2001 ############################################################ (1) New account for Hugo Villegas nu cat<villegas villegas:x:625:600:Hugo Villegas:/d/vnfe1/home/villegas:/bin/tcsh END vnNewUsers villegas cd Blurbs cp dsteck villegas vi villegas etc; sola vs # Gu2y06ZYC2xHY vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~villegas; echo villegas@physics.ubc.ca > .forward; chown villegas.choptuik .forward; ls -al; cat .forward' ############################################################ Mon May 7 17:52:49 PDT 2001 ############################################################ (1) New account for CVS account repository nu cat<cvs cvs:x:626:600:CVS repository account:/d/vnfe1/home2/cvs:/bin/tcsh END vnNewUsers cvs etc; sola vs # $1$cBNJk036$pTfTgL1rdg6E2r4j6E5qb0 vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~cvs; echo choptuik@physics.ubc.ca > .forward; chown cvs.choptuik .forward; ls -al; cat .forward' ############################################################ Sun May 13 12:43:19 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-05-13-1243.tar.gz # 2101 2101 39919 # laplace:/usr2/people/matt/system/vnArchive # 2101 2101 39919 cd vnArchive RM -r Rtop.2001-05-13-1243 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-05-13-1243.tar.gz cd Rtop.2001-05-13-1243 mv * .. cd .. rmdir Rtop.2001-05-13-1243 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:05' | pre CP | post '../Rtop.2001.05' | csh cd /home/matt/system/vnshadow/Rtop.2001.05 vnAccount mkdir ../vnAccount.01.05 mv `LS | grep -v '^200'` ../vnAccount.01.05 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.05 . ############################################################ Mon May 14 15:59:43 PDT 2001 ############################################################ (1) vn64 down 0:47 Apparently "false alarm"---in machine room, machine running and "alive" again? ############################################################ Sat May 26 10:42:34 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-05-26-1044.tar.gz # 2354 2354 44726 # laplace:/usr2/people/matt/system/vnArchive # 2354 2354 44726 cd vnArchive RM -r Rtop.2001-05-26-1044 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-05-26-1044.tar.gz cd Rtop.2001-05-26-1044 mv * .. cd .. rmdir Rtop.2001-05-26-1044 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:05' | pre CP | post '../Rtop.2001.05' | csh cd /home/matt/system/vnshadow/Rtop.2001.05 vnAccount /bin/rm -rf ../vnAccount.01.05 mkdir ../vnAccount.01.05 mv `LS | grep -v '^200'` ../vnAccount.01.05 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.05 vnAccount.01.05.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.05 . ############################################################ Tue May 29 10:48:30 PDT 2001 ############################################################ (1) Scott N observes that more recent versions of 'ps' *can* sort processes by CPU usage so i was messin' around with 'ps' and noticed that it works differently on bh6 (for instance) than vnfe1 and vn20 (for example). I then checked, and the cluster's version of 'ps' is different the cluster's: procps version 2.0.2 bhN's: procps version 2.0.6 What i was trying specifically was: ssh bh6 '(ps -eo "%C %U %c" --sort -pcpu)' | less this seems to sort all the jobs well based on CPU time. it also seems to work correctly on the bh machines but not the cluster. Something like this command with a perl script that selects those jobs with %CPU > 30 would probably do the trick regarding replacing RTOP. That is, once 'ps' is upgraded on the cluster. For example, I've made a script bh6:~scn/bin/psit that -- when given a host name -- opens a "less" session of the host's currents processes, sorted by %CPU. As I said before, it doesn't work correctly with the cluster machines, since their version of ps is bad. But, try it on the bh machines. Let me know if this helps or if you want me to upgrade ps on the cluster, or if you want me to implement this into RTOP. # ftp://ftp.vlug.org/dists/mandrake/8.0/i586/Mandrake/RPMS/ # Will see whether we can install procps-2.0.7-9mdk.i586.rpm # on cluster. As matt@vnfe1 cd RMPS scp matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/procps-2.0.7-9mdk.i586.rpm . # As root@vn64 cd ~matt/RPM/; rpm -Fvh procps*rpm; rpm --query procps only packages with major numbers <= 3 are supported by this version of RPM query of procps-2.0.7-9mdk.i586.rpm failed # vnallbgCommand 'cd ~matt/RPM/; rpm -Fvh bin*rpm; rpm --query bind' # vnallCommand 'rpm --query bind' ############################################################ Tue May 29 18:47:49 PDT 2001 ############################################################ (1) vnswitch reporting "Excessive CRC/alignment errors on port I8" (vn64) From May 25, 3:40 AM thru May 26, 11:00 AM # As root@vnfe1 vn64 up 63+07:57, 2 users, load 1.71, 1.71, 1.71 vn61 up 95+02:58, 0 users, load 1.98, 1.70, 1.52 ping -f -s 4096 vn64 # For ~ 1 minute 214979 packets transmitted, 214195 packets received, 0% packet loss Dropped 784 ping -f -s 4096 vn61 198036 packets transmitted, 197995 packets received, 0% packet loss # For ~ 1 minute Dropped 41 # Coded ~matt/scripts/vnPingTest # As root@vnfe1 foreach i (`iota 64`) vnPingTest vn${i} 10000 | tee -a vnPingTest.short.out end #----------------------------------------------------------------------- vn1 10017 10000 17 vn2 10021 10000 21 vn3 10017 10000 17 vn4 10017 10000 17 vn5 10017 10000 17 vn6 10017 10000 17 vn7 10017 10000 17 vn8 10029 10000 29 vn9 10030 10000 30 vn10 10114 10000 114 vn11 10021 10000 21 vn12 10017 10000 17 vn13 10041 10000 41 vn14 10017 10000 17 vn15 10128 10000 128 vn16 10139 10000 139 vn17 10019 10000 19 vn18 10017 10000 17 vn19 10017 10000 17 vn20 10017 10000 17 vn21 10052 10000 52 vn22 10017 10000 17 vn23 10021 10000 21 vn24 10017 10000 17 vn25 10022 10000 22 vn26 10018 10000 18 vn27 10035 10000 35 vn28 10022 10000 22 vn29 10021 10000 21 vn30 10080 10000 80 vn31 10017 10000 17 vn32 10038 10000 38 vn33 10087 10000 87 vn34 10056 10000 56 vn35 10017 10000 17 vn36 10040 10000 40 vn37 10017 10000 17 vn38 10036 10000 36 vn39 10073 10000 73 vn40 10046 10000 46 vn41 10022 10000 22 vn42 10022 10000 22 vn43 10033 10000 33 vn44 10086 10000 86 vn45 10068 10000 68 vn46 10021 10000 21 vn47 10019 10000 19 vn48 10025 10000 25 vn49 10051 10000 51 vn50 10091 10000 91 vn51 10019 10000 19 vn52 10030 10000 30 vn53 10019 10000 19 vn54 10021 10000 21 vn55 10017 10000 17 vn56 10021 10000 21 vn57 10022 10000 22 vn58 10073 10000 73 vn59 10024 10000 24 vn60 10093 10000 93 vn61 10018 10000 18 vn62 10019 10000 19 vn63 10028 10000 28 vn64 10074 10000 74 #----------------------------------------------------------------------- foreach i (`iota 64`) vnPingTest vn${i} 100000 | tee -a vnPingTest.long.out end #----------------------------------------------------------------------- vn1 100048 100000 48 vn2 100075 100000 75 vn3 100092 100000 92 vn4 100317 100000 317 vn5 100029 100000 29 vn6 100033 100000 33 vn7 100071 100000 71 vn8 100017 100000 17 vn9 100035 100000 35 vn10 100035 100000 35 vn11 100059 100000 59 vn12 100061 100000 61 vn13 100039 100000 39 vn14 100102 100000 102 vn15 100150 100000 150 vn16 100023 100000 23 vn17 100024 100000 24 vn18 100022 100000 22 vn19 100047 100000 47 vn20 100696 100000 696 vn21 100622 100000 622 vn22 100034 100000 34 vn23 100195 100000 195 vn24 100023 100000 23 vn25 100054 100000 54 vn26 100036 100000 36 vn27 100062 100000 62 vn28 100148 100000 148 vn29 100037 100000 37 vn30 100393 100000 393 vn31 100048 100000 48 vn32 100394 100000 394 vn33 100616 100000 616 vn34 100465 100000 465 vn35 100018 100000 18 vn36 100535 100000 535 vn37 100646 100000 646 vn38 100704 100000 704 vn39 100785 100000 785 vn40 100735 100000 735 vn41 100749 100000 749 vn42 100152 100000 152 vn43 100025 100000 25 vn44 100537 100000 537 vn45 100584 100000 584 vn46 100067 100000 67 vn47 100022 100000 22 vn48 100165 100000 165 vn49 100048 100000 48 vn50 100661 100000 661 vn51 100069 100000 69 vn52 100036 100000 36 vn53 100063 100000 63 vn54 100169 100000 169 vn55 100035 100000 35 vn56 100051 100000 51 vn57 100070 100000 70 vn58 100749 100000 749 vn59 100036 100000 36 vn60 100552 100000 552 vn61 100091 100000 91 vn62 100035 100000 35 vn63 100165 100000 165 vn64 100507 100000 507 #----------------------------------------------------------------------- ############################################################ Fri Jun 1 12:47:13 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-06-01-1249.tar.gz # 1100 1100 20900 # laplace:/usr2/people/matt/system/vnArchive # 1100 1100 20900 cd vnArchive RM -r Rtop.2001-06-01-1249 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-06-01-1249.tar.gz cd Rtop.2001-06-01-1249 mv * .. cd .. rmdir Rtop.2001-06-01-1249 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:05' | pre CP | post '../Rtop.2001.05' | csh cd /home/matt/system/vnshadow/Rtop.2001.05 vnAccount -p 25 /bin/rm -rf ../vnAccount.01.05 mkdir ../vnAccount.01.05 mv `LS | grep -v '^200'` ../vnAccount.01.05 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.05.O mv vnAccount.01.05 vnAccount.01.05.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.05 . ############################################################ Mon Jun 4 10:35:54 PDT 2001 ############################################################ (1) Reinstalling PGI version of RNPL on front-ends. vnfeallbgCommand 'cdi; Installz.PG rnpl' (2) Reinstalling RNPL on all nodes vnallbgCommand 'cdi; Installz rnpl' ############################################################ Thu Jun 7 22:22:40 PDT 2001 ############################################################ (1) New Account for Alexandre Gorelov nu cat<trinat trinat:x:9050:9000:Alexandre Gorelov:/d/vnfe2/home2/trinat:/bin/tcsh END vnNewUsers trinat etc; sola vs # $1$s7gcUWBn$U79w0uiCxisYvlrUHaudG/ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~trinat; echo iaeg@alph04.triumf.ca > .forward; chown trinat.other .forward; ls -al; cat .forward' ############################################################ Fri Jun 8 12:59:41 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-06-08-1259.tar.gz # 1195 1195 22705 # laplace:/usr2/people/matt/system/vnArchive # 195 1195 22705 cd vnArchive RM -r Rtop.2001-06-08-1259 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-06-08-1259.tar.gz cd Rtop.2001-06-08-1259 mv * .. cd .. rmdir Rtop.2001-06-08-1259 cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.06 LS | grep '^2001:06' | pre CP | post '../Rtop.2001.06' | csh cd /home/matt/system/vnshadow/Rtop.2001.06 vnAccount -p 25 mkdir ../vnAccount.01.06 mv `LS | grep -v '^200'` ../vnAccount.01.06 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.06 . ############################################################ Sat Jun 9 12:08:39 PDT 2001 ############################################################ (1) vn64 down, needs NIC replaced. ############################################################ Mon Jun 11 14:27:48 PDT 2001 ############################################################ (1) In machine room to replace vn64's NIC # Swapped out card # As root@vn6 vnSetdate hwclock --systohc hwclock --show /usr/local/bin/ntpd ntptimeset ############################################################ Thu Jun 7 22:22:40 PDT 2001 ############################################################ (1) New Account for David Axen nu cat<axen axen:x:9051:9000:David Axen:/d/vnfe1/home/axen:/bin/tcsh END vnNewUsers axen etc; sola vs # 2Y2dQ0mySXimA vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~axen; echo axen@physics.ubc.ca > .forward; chown axen.other .forward; ls -al; cat .forward' ############################################################ Mon Jun 18 12:50:34 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-06-18-1250.tar.gz # 1735 1735 32965 # laplace:/usr2/people/matt/system/vnArchive # 1735 1735 32965 cd vnArchive RM -r Rtop.2001-06-18-1250 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-06-18-1250.tar.gz cd Rtop.2001-06-18-1250 mv * .. cd .. rmdir Rtop.2001-06-18-1250 cd /home/matt/system/vnshadow/Rtop.2001 mv ../Rtop.2001.06 ../Rtop.2001.06.O mkdir ../Rtop.2001.06 LS | grep '^2001:06' | pre CP | post '../Rtop.2001.06' | csh cd /home/matt/system/vnshadow/Rtop.2001.06 vnAccount -p 25 mv ../vnAccount.01.06 ../vnAccount.01.06.O mkdir ../vnAccount.01.06 mv `LS | grep -v '^200'` ../vnAccount.01.06 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.06 vnAccount.01.06.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.06 . ############################################################ Wed Jun 20 11:56:40 PDT 2001 ############################################################ (1) vnfe3 will probably need a reboot see README.CRASH CRASH_99 vnSetDate hwclock --systohc hwclock --show ntptimeset Updated Web page, motd ############################################################ Thu Jun 21 08:47:18 PDT 2001 ############################################################ (1) 1: vn52 down 10:47 see README.CRASH CRASH_100 vnSetdate hwclock --systohc hwclock --show ntptimeset Apparently only zming and suqin were running zming@geog.ubc.ca suqin.mech.ubc.ca ############################################################ Fri Jun 22 11:08:46 PDT 2001 ############################################################ (1) New Account for Konstantin Pougatch nu cat<pougatch pougatch:x:1827:1800:Konstantin Pougatch:/d/vnfe3/home/pougatch:/bin/tcsh END vnNewUsers pougatch etc; sola vs # ftdNr.8KpwzAw vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~pougatch; echo pougatch@mech.ubc.ca > .forward; chown pougatch.salcudean .forward; ls -al; cat .forward' ############################################################ Fri Jun 22 12:33:09 PDT 2001 ############################################################ (1) New Account for Geoffrey D. Jacobs nu cat<jacobs jacobs:x:9052:9000:Geoffrey D. Jacobs:/d/vnfe1/home/jacobs:/bin/bash END vnNewUsers jacobs etc; sola vs # $1$fDg.trXL$tG/h/C14m/d/FE.Vbkdty/ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~jacobs; echo jacobsgd21@brandonu.ca > .forward; chown jacobs.other .forward; ls -al; cat .forward' ############################################################ Tue Jun 26 08:47:44 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-06-26-0847.tar.gz # 1247 1247 23693 # laplace:/usr2/people/matt/system/vnArchive # 1247 1247 23693 cd vnArchive RM -r Rtop.2001-06-26-0847 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-06-26-0847.tar.gz cd Rtop.2001-06-26-0847 mv * .. cd .. rmdir Rtop.2001-06-26-0847 cd /home/matt/system/vnshadow/Rtop.2001 RM -r ../Rtop.2001.06.O mv ../Rtop.2001.06 ../Rtop.2001.06.O mkdir ../Rtop.2001.06 LS | grep '^2001:06' | pre CP | post '../Rtop.2001.06' | csh cd /home/matt/system/vnshadow/Rtop.2001.06 vnAccount -p 20 RM -r ../vnAccount.01.06.O mv ../vnAccount.01.06 ../vnAccount.01.06.O mkdir ../vnAccount.01.06 mv `LS | grep -v '^200'` ../vnAccount.01.06 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.06.O mv vnAccount.01.06 vnAccount.01.06.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.06 . ############################################################ Fri Jun 29 15:13:43 PDT 2001 ############################################################ Installing NCAR graphics vnii vnfeCommand 'cdi; Installz ncarg.4.2.2.linux.pgi' vnfeCommand 'cdi; Installz.PG ncarg.4.2.2.linux.pgi' # Update /etc/csh.cshrc etc vi csh.cshrc setenv NCARG_ROOT /usr/local vnDistEtc csh.cshrc # Test ncargf77 example.f -o example setenv GRAPHCAP X11 idt gmeta ncargf77 fstream.f -o fstream fstream idt gmeta ctrans -d ps.mono -outfile the.ps gmeta ############################################################ Fri Jun 29 23:41:01 PDT 2001 ############################################################ (1) Installing NCAR graphics using gnu compilers etc vi csh.cshrc setenv NCARG_ROOT /usr/local vnDistEtc vnallbgCommand 'cdi; Installz ncarg.4.2.2.linux.gnu' ############################################################ Mon Jul 2 08:57:31 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-07-02-0857.tar.gz # 940 940 17860 # laplace:/usr2/people/matt/system/vnArchive # 940 940 17860 cd vnArchive RM -r Rtop.2001-07-02-0857 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-07-02-0857.tar.gz cd Rtop.2001-07-02-0857 mv * .. cd .. rmdir Rtop.2001-07-02-0857 cd /home/matt/system/vnshadow/Rtop.2001 RM -r ../Rtop.2001.06.O mv ../Rtop.2001.06 ../Rtop.2001.06.O mkdir ../Rtop.2001.06 LS | grep '^2001:06' | pre CP | post '../Rtop.2001.06' | csh cd /home/matt/system/vnshadow/Rtop.2001.06 vnAccount -p 20 RM -r ../vnAccount.01.06.O mv ../vnAccount.01.06 ../vnAccount.01.06.O mkdir ../vnAccount.01.06 mv `LS | grep -v '^200'` ../vnAccount.01.06 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.06.O mv vnAccount.01.06 vnAccount.01.06.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.06 . ############################################################ Fri Jul 6 09:26:38 PDT 2001 ############################################################ (1) New Account for Sarah Overduin (Patey) nu cat<overduin overduin:x:1253:1200:Sarah Overduin:/d/vnfe2/home/overduin:/bin/csh END vnNewUsers overduin etc; sola vs # TQ1QIF/4AWr9. vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~overduin; echo soverdui@chem.ubc.ca > .forward; chown overduin.patey .forward; ls -al; cat .forward' ############################################################ Wed Jul 11 20:10:44 PDT 2001 ############################################################ (1) Scott N made himself vn9:/home/scn ssh root@vn9 'cd /home; mkdir scn; chown scn.choptuik scn; ls -lt' ############################################################ Wed Jul 11 20:13:36 PDT 2001 ############################################################ (1) New Account for Robin Hsiung Post-doc. at Department of Medical Genetics Supervisor: Dr L. Leigh Field - Medical Genetics nu cat<hsiung hsiung:x:9053:9000:Robin Hsiung:/d/vnfe1/home/hsiung:/bin/ksh END vnNewUsers hsiung etc; sola vs # vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~hsiung; echo hsiung@interchange.ubc.ca > .forward; chown hsiung.other .forward; ls -al; cat .forward' ############################################################ Thu Jul 12 18:21:46 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-07-12-1822.tar.gz # 1781 1781 33839 # laplace:/usr2/people/matt/system/vnArchive # 1781 1781 33839 cd vnArchive RM -r Rtop.2001-07-12-1822 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-07-12-1822.tar.gz cd Rtop.2001-07-12-1822 mv * .. cd .. rmdir Rtop.2001-07-12-1822 cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.07 LS | grep '^2001:07' | pre CP | post '../Rtop.2001.07' | csh cd /home/matt/system/vnshadow/Rtop.2001.07 vnAccount -p 25 mkdir ../vnAccount.01.07 mv `LS | grep -v '^200'` ../vnAccount.01.07 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.07 . ############################################################ Wed Jul 18 14:56:03 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-07-18-1456.tar.gz # 852 852 16188 # laplace:/usr2/people/matt/system/vnArchive # 852 852 16188 cd vnArchive RM -r Rtop.2001-07-18-1456 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-07-18-1456.tar.gz cd Rtop.2001-07-18-1456 mv * .. cd .. rmdir Rtop.2001-07-18-1456 cd /home/matt/system/vnshadow/Rtop.2001 mv ../Rtop.2001.07 ../Rtop.2001.07.O mkdir ../Rtop.2001.07 LS | grep '^2001:07' | pre CP | post '../Rtop.2001.07' | csh cd /home/matt/system/vnshadow/Rtop.2001.07 vnAccount -p 25 mv ../vnAccount.01.07 ../vnAccount.01.07.O mkdir ../vnAccount.01.07 mv `LS | grep -v '^200'` ../vnAccount.01.07 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.07 . ############################################################ Thu Jul 19 10:11:24 PDT 2001 ############################################################ (1) Scott points out that 'sshd' is not working on vn43, apparently /usr/local/bin/sshd has disappeared Nope ... sshd lives in /usr/local/sbin/sshd ! ############################################################ Thu Jul 19 10:51:31 PDT 2001 ############################################################ (1) Trying to track down source of huge IP traffic (axen?) tcpdump "len>=1024" | tee /tmp/dump nth 2 < dump | grep vn | grep -v vnfe1 | upto \. | sort | repc | sort -r -n -k 2 # Nothing immediately apparent, but axen jobs are apparently being run for # short periods of time # Cleaning up 'zming's dead jobs vnallbgCommand 'ps -elf | grep zming | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Thu Jul 19 10:51:31 PDT 2001 ############################################################ (1) Jordana wants a Pascal compiler on the cluster http://www.freepascal.org/download.html#linuxrpm # Downloaded fpc-1.0.4-1.i386.rpm fpc-1.0.4-1.src.rpm fpc-docs-1.0.4-1.i386.rpm fpc-docs-1.0.4-1.src.rpm # to matt@vnfe1:~/RPM # As root@vnfe1 cd ~matt/RPM rpm -i fpc*rpm # Compiler is called 'ppc386' # Need some sample source, but otherwise OK # Compiling *.p files in ~tzenova/tlink ppc386 tilink.p tilink.p(213,37) Warning: Local variable V does not seem to be initialized tilink.p(277,17) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(280,17) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(284,18) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(287,18) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(288,34) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(289,16) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(4294,7) Error: Illegal qualifier tilink.p(4296,5) Error: Illegal qualifier tilink.p(4297,16) Error: Illegal qualifier tilink.p(4298,9) Error: Illegal qualifier tilink.p(4298,15) Error: Illegal qualifier tilink.p(4572,13) Error: Incompatible type for arg no. 2: Got SHORTSTRING, expected LONGINT tilink.p(5869) Fatal: There were 12 errors compiling module, stopping vnallbgCommand 'cd ~matt/RPM; rpm -i fpc*rpm; cd; rehash; which ppc386' ############################################################ Fri Aug 3 12:48:44 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-08-03-1248.tar.gz # 910 910 17290 # laplace:/usr2/people/matt/system/vnArchive # 910 910 17290 cd vnArchive RM -r Rtop.2001-08-03-1248 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-08-03-1248.tar.gz cd Rtop.2001-08-03-1248 mv * .. cd .. rmdir Rtop.2001-08-03-1248 cd /home/matt/system/vnshadow/Rtop.2001 RM -r ../Rtop.2001.07.O mv ../Rtop.2001.07 ../Rtop.2001.07.O mkdir ../Rtop.2001.07 LS | grep '^2001:07' | pre CP | post '../Rtop.2001.07' | csh cd /home/matt/system/vnshadow/Rtop.2001.07 vnAccount -p 15 RM -r ../vnAccount.01.07.O mv ../vnAccount.01.07 ../vnAccount.01.07.O mkdir ../vnAccount.01.07 mv `LS | grep -v '^200'` ../vnAccount.01.07 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.07 . ############################################################ Sat Aug 4 17:36:31 PDT 2001 ############################################################ vn48 down 3:06 # Pingable, not responsive # Hard reboot SEE README.CRASH CRASH_101 ntptimeset vnSetdate ntptimeset ############################################################ Sat Aug 11 10:00:38 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-08-11-1001.tar.gz # 1304 1304 24776 # laplace:/usr2/people/matt/system/vnArchive # 1304 1304 24776 cd vnArchive RM -r Rtop.2001-08-11-1001 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-08-11-1001.tar.gz cd Rtop.2001-08-11-1001 mv * .. cd .. rmdir Rtop.2001-08-11-1001 cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.08 LS | grep '^2001:08' | pre CP | post '../Rtop.2001.08' | csh cd /home/matt/system/vnshadow/Rtop.2001.08 vnAccount mkdir ../vnAccount.01.08 mv `LS | grep -v '^200'` ../vnAccount.01.08 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.08 . ############################################################ Tue Aug 21 15:22:03 PDT 2001 ############################################################ (1) Disabling 'tmpwatch', 'slocate.cron' daily cron jobs cd /d/vnfe1/home/matt/system/vn/image/master/etc/cron.daily scp matt@vnfe1.physics.ubc.ca:/etc/cron.daily/tmpwatch . scp matt@vnfe1.physics.ubc.ca:/etc/cron.daily/slocate.cron . vnallbgCommand '/bin/rm /etc/cron.daily/tmpwatch' vnallbgCommand 'ls /etc/cron.daily/tmpwatch' vnallbgCommand '/bin/rm /etc/cron.daily/slocate.cron' vnallbgCommand 'ls /etc/cron.daily/slocate.cron' ############################################################ Wed Aug 22 10:10:52 PDT 2001 ############################################################ (1) Something strange with vn11:/dev/hdal, df reports Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 12213917 9851659 1727693 85% / but Usage can only account for about 6-7 GB? 4949994 home 1472251 usr 64464 lib 60120 var 5767 sbin 5115 bin 5098 boot 3237 etc 1290 tmp 1148 root 112 dev 12 lost+found 6 opt 4 mnt 1 misc 0 proc ############################################################ Fri Aug 24 11:21:42 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-08-24-1121.tar.gz # 1679 1679 319016 # laplace:/usr2/people/matt/system/vnArchive # 1679 1679 319016 cd vnArchive RM -r Rtop.2001-08-24-1121 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-08-24-1121.tar.gz cd Rtop.2001-08-24-1121 mv * .. cd .. rmdir Rtop.2001-08-24-1121 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:08' | pre CP | post '../Rtop.2001.08' | csh cd /home/matt/system/vnshadow/Rtop.2001.08 vnAccount mv ../vnAccount.01.08 ../vnAccount.01.08.O mkdir ../vnAccount.01.08 mv `LS | grep -v '^200'` ../vnAccount.01.08 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.08 vnAccount.01.08.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.08 . ############################################################ Wed Aug 29 17:06:54 PDT 2001 ############################################################ --> 63.192.96.2 doesn't seem to be alive # Front end /etc/ntp.conf # peer configuration for hosts vnfe[123].physics.ubc.ca # (expected to operate at stratum 3 or higher) server 132.246.168.148 server 63.192.96.2 server 192.35.82.50 driftfile /etc/ntp.drift Changing to 137.82.1.3 etc scp ntp.conf.fe root@vnfe1:/etc/ntp.conf scp ntp.conf.fe root@vnfe2:/etc/ntp.conf scp ntp.conf.fe root@vnfe3:/etc/ntp.conf ############################################################ Fri Aug 31 09:33:33 PDT 2001 ############################################################ (1) New Account for Steve Plotkin nu cat<plotkin plotkin:x:9054:9000:Steven Plotkin:/d/vnfe1/home/plotkin:/bin/bash END vnNewUsers plotkin etc; sola vs # $1$4/NayQK2$iIweX3I6Cc.c/kkuJE7o7. vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~plotkin; echo steve@physics.ubc.ca> .forward; chown plotkin.other .forward; ls -al; cat .forward' ############################################################ Sat Sep 1 20:03:27 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-09-01-2003.tar.gz # 1325 1325 251756 # laplace:/usr2/people/matt/system/vnArchive # 1325 1325 251756 cd vnArchive RM -r Rtop.2001-09-01-2003 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-09-01-2003.tar.gz cd Rtop.2001-09-01-2003 mv * .. cd .. rmdir Rtop.2001-09-01-2003 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:08' | pre CP | post '../Rtop.2001.08' | csh cd /home/matt/system/vnshadow/Rtop.2001.08 vnAccount mv ../vnAccount.01.08 ../vnAccount.01.08.O mkdir ../vnAccount.01.08 mv `LS | grep -v '^200'` ../vnAccount.01.08 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.08.O mv vnAccount.01.08 vnAccount.01.08.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.08 . ############################################################ Mon Sep 3 08:23:30 PDT 2001 ############################################################ (1) Compiler licensing demon apparently died overnight # Had some problems with the following sequence (sans killall -9 lmgrd) # Ensure that lmgrd *does* shutdown. vnfeCommand 'killall -9 pgroupd' vnfeCommand 'killall -9 lmgrd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit vnfeCommand 'cd TestPGI; make clean; make' ############################################################ Wed Sep 5 12:07:32 PDT 2001 ############################################################ (1) Everything down after power outage 6:30-10:15 vnfe1's second disk apparently fried, swapped in extra fdisk /dev/sdb mkfs /dev/sdb1 vnallCommand 'mount -a; df' # vnfe3's clock OK, made it master in vnSetdate vnNbgCommand 'vnSetdate; hwclock --systohc' # Everything seemsto be mounted OK ############################################################ Thu Sep 13 15:44:27 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-09-13-1544.tar.gz # 1878 1878 35682 # laplace:/usr2/people/matt/system/vnArchive # 1878 1878 35682 cd vnArchive RM -r Rtop.2001-09-13-1544 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-09-13-1544.tar.gz cd Rtop.2001-09-13-1544 mv * .. cd .. rmdir Rtop.2001-09-13-1544 cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.09 LS | grep '^2001:09' | pre CP | post '../Rtop.2001.09' | csh cd /home/matt/system/vnshadow/Rtop.2001.09 vnAccount -p 25 mkdir ../vnAccount.01.09 mv `LS | grep -v '^200'` ../vnAccount.01.09 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.09 . ############################################################ Wed Sep 19 19:50:43 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-09-19-1950.tar.gz # 939 939 17841 # laplace:/usr2/people/matt/system/vnArchive # 939 939 17841 cd vnArchive RM -r Rtop.2001-09-19-1950 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-09-19-1950.tar.gz cd Rtop.2001-09-19-1950 mv * .. cd .. rmdir Rtop.2001-09-19-1950 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:09' | pre CP | post '../Rtop.2001.09' | csh cd /home/matt/system/vnshadow/Rtop.2001.09 vnAccount -p 25 RM -r ../vnAccount.01.09 mkdir ../vnAccount.01.09 mv `LS | grep -v '^200'` ../vnAccount.01.09 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.09 vnAccount.01.09.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.09 . ############################################################ Mon Oct 1 09:24:21 PDT 2001 ############################################################ vn43 down 7:50 see README.CRASH (CRASH_104) ############################################################ Mon Oct 1 09:28:47 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-01-0928.tar.gz # 2022 2022 38418 # laplace:/usr2/people/matt/system/vnArchive # 2022 2022 38418 cd vnArchive RM -r Rtop.2001-10-01-0928 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.2001-10-01-0928.tar.gz cd Rtop.2001-10-01-0928 mv * .. cd .. rmdir Rtop.2001-10-01-0928 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:09' | pre CP | post '../Rtop.2001.09' | csh cd /home/matt/system/vnshadow/Rtop.2001.09 vnAccount -p 25 RM -r ../vnAccount.01.09 mkdir ../vnAccount.01.09 mv `LS | grep -v '^200'` ../vnAccount.01.09 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN mv vnAccount.01.09 vnAccount.01.09.O scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.09 . ############################################################ Fri Aug 31 09:33:33 PDT 2001 ############################################################ (1) New Account for Chad Larson nu cat<clarson clarson:x:1612:1600:Chad Larson:/d/vnfe3/home/clarson:/bin/bash END vnNewUsers clarson etc; sola vs # vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~clarson; echo clarson@mech.ubc.ca> .forward; chown clarson.other .forward; ls -al; cat .forward' ############################################################ Tue Oct 2 20:53:08 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-02-2054.tar.gz # 226 226 4294 # laplace:/usr2/people/matt/system/vnArchive # 226 226 4294 cd vnArchive RM -r Rtop.2001-10-02-2054 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 mkdir ../Rtop.2001.10 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 25 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Wed Oct 10 21:44:51 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-10-2144.tar.gz # 1425 1425 27075 # laplace:/usr2/people/matt/system/vnArchive 1425 1425 27075 d vnArchive RM -r Rtop.2001-10-10-2144 cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 25 RM -r ../vnAccount.01.10 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Thu Oct 11 17:37:00 PDT 2001 ############################################################ (1) New Account for Christopher Liew nu cat<cliew cliew:x:9055:9000:Chad Larson:/d/vnfe1/home/cliew:/bin/bash END vnNewUsers cliew etc; sola vs # afybPHd/PWu22 vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~cliew; echo cliew@interchange.ubc.ca> .forward; chown cliew.other .forward; ls -al; cat .forward' ############################################################ Thu Oct 18 17:42:00 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-18-1742.tar.gz # 1147 1147 21793 # laplace:/usr2/people/matt/system/vnArchive # 1147 1147 21793 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 20 RM -r ../vnAccount.01.10 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Tue Oct 23 07:59:26 PDT 2001 ############################################################ (1) NFS mounting bh2:/home on cluster vnallbgCommand 'mkdir -p /d/bh2/home' vnallbgCommand 'echo bh2:/home /d/bh2/home nfs rw,bg,hard,intr 0 0 >> /etc/fstab' ############################################################ Wed Oct 24 22:24:08 PDT 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-24-2224.tar.gz # 1005 1005 19095 # laplace:/usr2/people/matt/system/vnArchive # 1005 1005 19095 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 20 RM -r ../vnAccount.01.10 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Tue Oct 30 20:37:57 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-10-30-2038.tar.gz # 1038 1038 19722 # laplace:/usr2/people/matt/system/vnArchive # 1038 1038 19722 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 15 RM -r ../vnAccount.01.10 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Thu Nov 1 20:24:16 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-11-01-2026.tar.gz # 301 301 5719 # laplace:/usr2/people/matt/system/vnArchive # 301 301 5719 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:10' | pre CP | post '../Rtop.2001.10' | csh cd /home/matt/system/vnshadow/Rtop.2001.10 vnAccount -p 15 RM -r ../vnAccount.01.10 mkdir ../vnAccount.01.10 mv `LS | grep -v '^200'` ../vnAccount.01.10 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.10 . ############################################################ Sun Nov 4 08:52:37 PST 2001 ############################################################ vn4 down 17:35 SEE README.CRASH (CRASH_105) ############################################################ Fri Nov 9 10:02:20 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-11-09-1002.tar.gz # 1132 1132 21508 # laplace:/usr2/people/matt/system/vnArchive # 1132 1132 21508 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} mkdir /home/matt/system/vnshadow/Rtop.2001.11 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:11' | pre CP | post '../Rtop.2001.11' | csh cd /home/matt/system/vnshadow/Rtop.2001.11 vnAccount -p 20 mkdir ../vnAccount.01.11 mv `LS | grep -v '^200'` ../vnAccount.01.11 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.11 . ############################################################ Tue Nov 20 20:30:41 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-11-20-2030.tar.gz # 1822 1822 34618 # laplace:/usr2/people/matt/system/vnArchive # 1822 1822 34618 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:11' | pre CP | post '../Rtop.2001.11' | csh cd /home/matt/system/vnshadow/Rtop.2001.11 vnAccount -p 20 RM -r ../vnAccount.01.11 mkdir ../vnAccount.01.11 mv `LS | grep -v '^200'` ../vnAccount.01.11 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.11 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.11 . ############################################################ Wed Nov 21 18:01:49 PST 2001 ############################################################ (1) New Account for Andrea Frisque nu cat<andrea andrea:x:1613:1600:Andrea Frisque:/d/vnfe3/home/andrea:/bin/tcsh END vnNewUsers andrea etc; sola vs # gQckZVYDECRvQ vnDistEtc shadow # Set .forward ssh root@vnfe3 'cd ~andrea; echo andrea@mech.ubc.ca> .forward; chown andrea.bushe .forward; ls -al; cat .forward' ############################################################ Mon Nov 26 19:51:56 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-11-26-1952.tar.gz # 1006 1006 19114 # laplace:/usr2/people/matt/system/vnArchive # 1006 1006 19114 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:11' | pre CP | post '../Rtop.2001.11' | csh cd /home/matt/system/vnshadow/Rtop.2001.11 vnAccount -p 20 RM -r ../vnAccount.01.11 mkdir ../vnAccount.01.11 mv `LS | grep -v '^200'` ../vnAccount.01.11 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.11 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.11 . ############################################################ Tue Nov 27 12:17:10 PST 2001 ############################################################ (1) New Account for Mario Pineda-Krch (postdoc with Sally Otto) nu cat<pineda pineda:x:9056:9000:Mario Pineda-Krch:/vnfe1/home/pineda:/bin/bash END vnNewUsers pineda etc; sola vs # $1$yZiyzzwf$PQd7k.TEtnTTdHHKYxXOh0 vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~pineda; echo pineda@zoology.ubc.ca> .forward; chown pineda.other .forward; ls -al; cat .forward' ############################################################ Tue Nov 27 13:21:06 PST 2001 ############################################################ (1) New Account for Lin Wen (Bushe grad student) nu cat<linwen linwen:x:1614:1600:Lin Wen:/d/vnfe3/home/linwen:/bin/bash END vnNewUsers linwen etc; sola vs # yK69Z9iobC2es vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~linwen; echo linwen@hotmail.com> .forward; chown linwen.bushe .forward; ls -al; cat .forward' ############################################################ Fri Nov 30 15:30:10 PST 2001 ############################################################ (1) Archiving some of the Rtop files to CD-ROM http://www.linuxdoc.org/HOWTO/CD-Writing-HOWTO-3.html#ss3.1 # As root@bh0 mkdir -p /var/tmp/cdrom cd /var/tmp/cdrom mkisofs -r -o vnArchive /d/laplace/usr2/people/matt/system/vnArchive mkdir /mnt/cdrom-soft # Will delay for time being since actually don't have that much data mount -t iso9660 -o ro,loop=/dev/loop0 vnArchive /mnt/cdrom-soft ############################################################ Sat Dec 1 20:07:26 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-12-01-2007.tar.gz # 797 797 15143 # laplace:/usr2/people/matt/system/vnArchive # 797 797 15143 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:11' | pre CP | post '../Rtop.2001.11' | csh cd /home/matt/system/vnshadow/Rtop.2001.11 vnAccount -p 20 RM -r ../vnAccount.01.11 mkdir ../vnAccount.01.11 mv `LS | grep -v '^200'` ../vnAccount.01.11 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.11 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.11 . ############################################################ Tue Dec 4 08:03:30 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-12-04-0803.tar.gz # 414 414 7866 # laplace:/usr2/people/matt/system/vnArchive # 414 414 7866 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} mkdir /home/matt/system/vnshadow/Rtop.2001.12 cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:12' | pre CP | post '../Rtop.2001.12' | csh cd /home/matt/system/vnshadow/Rtop.2001.12 vnAccount -p 20 mkdir ../vnAccount.01.12 mv `LS | grep -v '^200'` ../vnAccount.01.12 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.12 . ############################################################ Fri Dec 7 11:45:58 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-12-07-1146.tar.gz # 520 520 9880 # laplace:/usr2/people/matt/system/vnArchive # 520 520 9880 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:12' | pre CP | post '../Rtop.2001.12' | csh cd /home/matt/system/vnshadow/Rtop.2001.12 vnAccount -p 20 RM -r ../vnAccount.01.12 mkdir ../vnAccount.01.12 mv `LS | grep -v '^200'` ../vnAccount.01.12 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.12 . ############################################################ Thu Dec 13 16:04:22 PST 2001 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2001-12-13-1604.tar.gz # 1017 1017 19323 # laplace:/usr2/people/matt/system/vnArchive # 1017 1017 19323 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv * .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:12' | pre CP | post '../Rtop.2001.12' | csh cd /home/matt/system/vnshadow/Rtop.2001.12 vnAccount -p 20 RM -r ../vnAccount.01.12 mkdir ../vnAccount.01.12 mv `LS | grep -v '^200'` ../vnAccount.01.12 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.12 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.12 . ############################################################ Fri Dec 14 17:58:33 PST 2001 ############################################################ (1) Wind storm last night with gusts to 100 km/h multiple power outages from 3AM - 3PM TO RESTART COMPILERS vnfeCommand 'killall -9 lmgrd; killall -9 pgroupd' vnfeCommand 'killall -9 lmgrd; killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ############################################################ Tue Dec 18 09:56:09 PST 2001 (Happy Birthday PWC!) ############################################################ (1) vn51 down (incommunicado) about 2:00 AM see README.CRASH (CRASH_108) ############################################################ Tue Jan 1 08:57:59 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-01-01-0858.tar.gz # 3276 3276 62244 # laplace:/usr2/people/matt/system/vnArchive # 3276 3276 62244 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2001* .. mkdir /home/matt/system/vnshadow/Rtop.2002 cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2001 LS | grep '^2001:12' | pre CP | post '../Rtop.2001.12' | csh cd /home/matt/system/vnshadow/Rtop.2001.12 vnAccount -p 20 RM -r ../vnAccount.01.12 mkdir ../vnAccount.01.12 mv `LS | grep -v '^200'` ../vnAccount.01.12 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.01.12 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.01.12 . ############################################################ Sun Jan 13 10:40:09 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-01-13-1040.tar.gz # 2183 2183 41477 # laplace:/usr2/people/matt/system/vnArchive # 2183 2183 41477 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:01' | pre CP | post '../Rtop.2002.01' | csh cd /home/matt/system/vnshadow/Rtop.2002.01 vnAccount -p 20 mkdir ../vnAccount.02.01 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.01 . ############################################################ Tue Jan 15 10:42:54 PST 2002 ############################################################ (1) New Account for Chi-Hsien Chen (Sossi postdoc) nu cat<chichen chichen:x:9057:9000:Chi-Hsien Chen:/d/vnfe1/home/chichen:/bin/bash END vnNewUsers chichen etc; sola vs # vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~chichen; echo chichen@pet.ubc.ca> .forward; chown chichen.other .forward; ls -al; cat .forward' ############################################################ Wed Jan 16 16:49:09 PST 2002 ############################################################ (1) New Account for Sune Norhoj Jespersen (Plischke postdoc, SFU) nu cat<sjespers sjespers:x:9058:9000:Sune Norhoj Jespersen:/d/vnfe1/home/sjespers:/bin/bash END vnNewUsers sjespers etc; sola vs # $1$Q2WIr.xk$AbhLepPY/TADhY/beGHH4/ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~sjespers; echo sjespers@sfu.ca> .forward; chown sjespers.other .forward; ls -al; cat .forward' ############################################################ Sun Jan 20 08:48:48 PST 2002 ############################################################ (1) vn62 down see README.CRASH (CRASH_109) ############################################################ Sun Jan 20 09:40:31 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-01-20-0940.tar.gz # 1212 1212 23028 # laplace:/usr2/people/matt/system/vnArchive # 1212 1212 23028 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:01' | pre CP | post '../Rtop.2002.01' | csh cd /home/matt/system/vnshadow/Rtop.2002.01 vnAccount -p 20 RM -rf ../vnAccount.02.01 mkdir ../vnAccount.02.01 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.01 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.01 . ############################################################ Sun Jan 27 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-01-27-0754.tar.gz # 1281 1281 24339 # laplace:/usr2/people/matt/system/vnArchive # 1281 1281 24339 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:01' | pre CP | post '../Rtop.2002.01' | csh cd /home/matt/system/vnshadow/Rtop.2002.01 vnAccount -p 20 RM -rf ../vnAccount.02.01 mkdir ../vnAccount.02.01 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.01 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.01 . ############################################################ Wed Jan 30 10:29:25 PST 2002 ############################################################ (1) Mounting bh0:/home on vnfe1 # As matt@vnfe1 etc # fstab.vnfe1 bh0:/home /d/bh0/home nfs rw,bg,hard,intr 0 0 laplace:/usr2 /d/laplace/usr2 nfs rw,bg,hard,intr 0 0 scp fstab.vnfe1 root@vnfe1:/etc/fstab # As matt@bh0 etcbh # exports.bh0 /home vn*.physics.ubc.ca(rw,no_root_squash) bh*.physics.ubc.ca(rw,no_root_squash) lnx*.physics.ubc.ca(rw,no_root_squash) laplace.physics.ubc.ca(rw,no_root_squash) godel.physics.ubc.ca(rw,no_root_squ/mnt/cdrom laplace.physics.ubc.ca(rw,no_root_squash) godel.physics.ubc.ca(rw,no_root_squash) sgi1.physics.ubc.ca(rw,no_root_squash) sgi2.physics.ubc.ca(rw,no_root_squash) scp exports.bh0 root@bh0:/etc/exports ssh root@bh0 'exportfs -av' # As root@vnfe1 mkdir -p /d/bh0/home mount -a ############################################################ Wed Jan 30 11:06:08 PST 2002 ############################################################ (1) Dave Jones reports fan noise from vn48, take front cover off, tap front plate, noise diminishes, will leave for time being ############################################################ Fri Feb 1 07:05:04 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-02-01-0705.tar.gz # 939 939 17841 # laplace:/usr2/people/matt/system/vnArchive # 939 939 17841 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:01' | pre CP | post '../Rtop.2002.01' | csh cd /home/matt/system/vnshadow/Rtop.2002.01 vnAccount -p 20 RM -rf ../vnAccount.02.01 mkdir ../vnAccount.02.01 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.01 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.01 . ############################################################ Fri Feb 8 07:05:04 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-02-08-1352.tar.gz # 1429 1429 27151 # laplace:/usr2/people/matt/system/vnArchive # 1429 1429 27151 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.02 LS | grep '^2002:02' | pre CP | post '../Rtop.2002.02' | csh cd /home/matt/system/vnshadow/Rtop.2002.02 vnAccount -p 20 RM -rf ../vnAccount.02.02 mkdir ../vnAccount.02.02 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.02 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.02 . ############################################################ Thu Feb 14 10:43:53 PST 2002 ############################################################ (1) New Account for Mark Boyland (Fred Bunnell, Forest Sciences) nu cat<mboyland mboyland:x:9059:9000:Mark Boyland:/d/vnfe1/home/mboyland:/bin/bash END vnNewUsers mboyland etc; sola vs # IYy/.D04KbedQ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~mboyland; echo markbo@unixg.ubc.ca> .forward; chown mboyland.other .forward; ls -al; cat .forward' Hi Mark: Happy to set you up an account. So that I can complete the administrivia, please forward an encrypted password entry from /etc/passwd or /etc/shadow from a local Unix machine. Your password on the cluster will then be clone of the one on the local machine. Regarding Java, the news isn't great I'm afraid. We haven't completely upgraded the OS since the system was installed primarily since there isn't any compelling reason to do so when you're running compiled code, and since upgrading is almost always downgrading on a heavily used machine, in that the CPU time you lose doing the upgrading, then fixing all the problems which appear, is seldom recovered by the users. If you can get your updated JRE working with the OS as it is now, then great; in fact, if it turns out to be straightforward then we'd probably even want to install ourselves. I'm just not overly optimistic since Doug James, a grad student from Math, had similar aspirations a while back, and we just couldn't upgrade to the point that he needed. Cheers ... Matt Hi Matt, Thanks, I've attached my etc/shadow file. I'm hoping this is what you need, it has my password down the bottom -- user: mboyland. On the Java front, I _think_ it can be done fairly easily. At least, on local machines it's easy, I don't know what difficulties the cluster distribution might pose. On a local machine I simply install a standard rpm (IBMJava2-JRE-1.3-10.0i386.rpm works well). This installs by default to /opt/IBM... which is nice -- it won't overwrite the Kaffe JRE. Of course, this doesn't actually do anything useful without updating the PATH variable in your shell of choice (.bashrc or .bash_profile for the BASH shell). The OS shouldn't need to be altered. If you would like to try installing it, the IBM JRE is downloadable from http://www6.software.ibm.com/dl/dklx130/dklx130-i (where prostration by registration to the IBM gods is required) or by ftp here at: atlas.forestry.ubc.ca user: atlas1 pwd: atlas-1 (those are one's, not the letter 'l'); navigate to: ATLAS/outgoing/markbo/IBMJRE/IBMJava2-JRE-1.3-10.0i386.rpm ciao, mark # As matt@vnfe1 cd RPMS scp matt@bh0.physics.ubc.ca:/d/bh0/home/matt/RPMS/IBMJava2-JRE-1.3-10.0.i386.rpm . # As root@vnfe1 rpm -e `rpm -qa | grep IBM` rpm -ivh /d/vnfe1/home/matt/RPMS/IBMJava2-JRE-1.3-10.0.i386.rpm vnallbgCommand 'rpm -e `rpm -qa | grep IBM`' vnallbgCommand 'rpm -ivh /d/vnfe1/home/matt/RPMS/IBMJava2-JRE-1.3-10.0.i386.rpm' I'm sorry to bother you again so soon. I have one further install request. Sun's JRE. It's at: java.sun.com/j2se/1.4/download.html (the Linux RPM shell script works well) or from my ftp at: atlas/forestry.ubc.ca navigate to: /Atlas/outgoin/markbo/SUNJRE/j2re-1_4_0-linux-i386-rpm.bin user: atlas1 pswd: atlas -1 (those are ones, not the letter "l") thank you Mark # As matt@vnfe1 cd RPMS scp matt@bh0.physics.ubc.ca:/d/bh0/home/matt/RPMS/j2re-1_4_0-linux-i386-rpm.bin . echo "yes" | sh j2re-1_4_0-linux-i386-rpm.bin # As root@vnfe1 rpm -ivh /d/vnfe1/home/matt/RPMS/j2re-1_4_0-fcs-linux-i386.rpm # As matt@vnfe1 vnallbgCommand 'rpm -ivh /d/vnfe1/home/matt/RPMS/j2re-1_4_0-fcs-linux-i386.rpm' vnallCommand 'rpm -qa | grep j2re' > /tmp/j2re foreach i (5 6 20 25 43) ssh root@vn${i} 'rpm -Uvh /d/vnfe1/home/matt/RPMS/glibc-2.1.3-18.2mdk.i586.rpm' end foreach i (5 6 20 25 43) ssh root@vn${i} 'rpm -ivh /d/vnfe1/home/matt/RPMS/j2re-1_4_0-fcs-linux-i386.rpm' end vnallCommand 'rpm -qa | grep j2re' > /tmp/j2re ############################################################ Sat Feb 16 23:04:36 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-02-16-2304.tar.gz # 1636 1636 31084 # laplace:/usr2/people/matt/system/vnArchive # 1636 1636 31084 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:02' | pre CP | post '../Rtop.2002.02' | csh cd /home/matt/system/vnshadow/Rtop.2002.02 vnAccount -p 20 RM -rf ../vnAccount.02.02 mkdir ../vnAccount.02.02 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.02 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.02 . ############################################################ Tue Feb 19 15:12:17 PST 2002 ############################################################ (1) Screwed up creation of Mark Boyland's account, gave him uid 9058 (sjespers) instead of 9059 etc vi /etc/paswd mboyland:!:9058:9000:Mark Boyland:/d/vnfe1/home/mboyland:/bin/bash mboyland:!:9059:9000:Mark Boyland:/d/vnfe1/home/mboyland:/bin/bash vnDistEtc passwd # As root@vnfe1 cd ~mboyland chown -R mboyland * .??* ############################################################ Wed Feb 20 08:00:43 PST 2002 ############################################################ (1) Chi Chen has filled up vnfe1:/home ############################################################ Wed Feb 20 13:52:53 PST 2002 ############################################################ (1) Scott reports following problem with vn33 Hey Matt, while checking up on some "by-hand" crit. searches, I found two problems: 1) on vn23: when I do this: prompt> sdftosv -i '1-*/4' g1.5rhoc=0.01tmr_0.sdf I get the following error message and the sdf file is not displayed: Out of memory error in vec_alloc! The only "big" process that was running on vn23 at the time (about five minutes ago) was my 'idealss' process, which was taking about 2.1% of the memory. Doing the same command with the same file on bh6 works fine with no error messages. The file is located at : /d/bh2/home/scn/g2/vprofile/rhoc_vp=0.02/g1.5rhoc=0.01tmr_0.sdf 2) on vn33: my code "Segmentation Fault"'s on this node, but no others (as fas as I know, i've only tested a few random nodes). The following is the "strace" of the execution (i.e. 'strace idealss input_file') vn33 : g2/vprofile/rhoc_vp=0.01: ./idealss input_crit Segmentation fault vn33 : g2/vprofile/rhoc_vp=0.01: strace ./idealss input_crit execve("./idealss", ["./idealss", "input_crit"], [/* 63 vars */]) = 0 brk(0) = 0xdc84a88 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aa be000 open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=46022, ...}) = 0 mmap(NULL, 46022, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2aabf000 close(3) = 0 open("/lib/libm.so.6", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0755, st_size=540397, ...}) = 0 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\220F\0"..., 4096) = 409 6 mmap(NULL, 120984, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aacb000 mprotect(0x2aae8000, 2200, PROT_NONE) = 0 mmap(0x2aae8000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x1c000) = 0x2aae8000 close(3) = 0 open("/lib/libc.so.6", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0755, st_size=5257684, ...}) = 0 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0H\215\1"..., 4096) = 409 6 mmap(NULL, 945276, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x2aae9000 mprotect(0x2abc8000, 31868, PROT_NONE) = 0 mmap(0x2abc8000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0xde000) = 0x2abc8000 mmap(0x2abcd000, 11388, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOU S, -1, 0) = 0x2abcd000 close(3) = 0 munmap(0x2aabf000, 46022) = 0 getpid() = 20414 --- SIGSEGV (Segmentation fault) --- +++ killed by SIGSEGV +++ From scn@warp.physics.ubc.ca Wed Feb 20 11:31:05 2002 Received: from warp.physics.ubc.ca (warp.physics.ubc.ca [142.103.236.11]) by laplace.physics.ubc.ca (8.9.3/8.9.3) with ESMTP id LAA3716867 for ; Wed, 20 Feb 2002 11:31:04 -0800 (PST) Received: (from scn@localhost) by warp.physics.ubc.ca (8.11.3/8.11.3) id g1KJVfZ20965 for matt@laplace.physics.ubc.ca; Wed, 20 Feb 2002 11:31:41 -0800 (PST) From: Scott Noble Message-Id: <200202201931.g1KJVfZ20965@warp.physics.ubc.ca> Subject: node problems (fwd) To: matt@laplace.physics.ubc.ca (Matt Choptuik) Date: Wed, 20 Feb 2002 11:31:41 -0800 (PST) X-Mailer: ELM [version 2.5 PL3] MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Status: R Regarding the "vec_alloc" problem: I 'strace'-ed the sdftosv command and the error message seems to be coming from 'rt_sigsuspend()' : rt_sigsuspend([INT RT_2]Out of memory error in vec_alloc! --- SIGCHLD (Child exited) --- I don't know what this is... does it ring a bell for you? thanks, scott n. > > 1) on vn23: when I do this: > > prompt> sdftosv -i '1-*/4' g1.5rhoc=0.01tmr_0.sdf > > I get the following error message and the sdf file is not displayed: > > Out of memory error in vec_alloc! > > The only "big" process that was running on vn23 at the time (about > five minutes ago) was my 'idealss' process, which was taking about 2.1% of the > memory. Doing the same command with the same file on bh6 works fine with > no error messages. The file is located at : > /d/bh2/home/scn/g2/vprofile/rhoc_vp=0.02/g1.5rhoc=0.01tmr_0.sdf # Were several of Mark Boyland's java tasks running 15725 sjespers 0 0 1016M 396M 2272 S 396M 0.0 78.5 0:00 java 15726 sjespers 0 0 1016M 396M 2272 S 396M 0.0 78.5 0:00 java 15727 sjespers 0 0 1016M 396M 2272 S 396M 0.0 78.5 0:00 java # Killed them, apparently got functionality back vn23 v34 vn53 ############################################################ Sat Feb 23 07:18:22 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-02-23-0718.tar.gz # 1211 1211 23009 # laplace:/usr2/people/matt/system/vnArchive # 1211 1211 23009 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} mv 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:02' | pre CP | post '../Rtop.2002.02' | csh cd /home/matt/system/vnshadow/Rtop.2002.02 vnAccount -p 20 RM -rf ../vnAccount.02.02 mkdir ../vnAccount.02.02 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.02 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.02 . ############################################################ Tue Mar 5 19:17:37 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-03-05-1917.tar.gz # 1571 1571 29849 # laplace:/usr2/people/matt/system/vnArchive # 1571 1571 29849 cd vnArchive RM -r Rtop.${TS} setenv TS 2002-03-05-1917 cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:02' | pre CP | post '../Rtop.2002.02' | csh cd /home/matt/system/vnshadow/Rtop.2002.02 vnAccount -p 20 RM -rf ../vnAccount.02.02 mkdir ../vnAccount.02.02 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.02 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.02 . ############################################################ Fri Mar 8 13:28:55 PST 2002 ############################################################ (1) New Account for Yaoguo Fan (Salcudean) nu cat<yfan yfan:x:1828:1800:Yaoguo Fan:/d/vnfe3/home/yfan:/bin/tcsh END vnNewUsers yfan etc; sola vs # V..aj42Np1EQU vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~yfan; echo fan_user@yahoo.com > .forward; chown yfan.salcudean .forward; ls -al; cat .forward' ############################################################ Wed Mar 13 11:06:15 PST 2002 ############################################################ (1) New Account for Dae-Il Choi nu cat<dale dale:x:1155:600:Dae-Il (Dale) Choi:/d/vnfe1/home/dale:/bin/tcsh END vnNewUsers dale etc; sola vs # Aa2f/vy4753i. vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~dale; echo choi@godel.gsfc.nasa.gov > .forward; chown dale.choptuik .forward; ls -al; cat .forward' ############################################################ Thu Mar 14 08:44:08 PST 2002 ############################################################ (1) New Account for Kassim Olawale (Grad Stud, ECE, Casas/Donaldson supers) nu cat<kassimo kassimo:x:9060:9000:Kassim Olawale:/d/vnfe3/home/kassimo:/bin/tcsh END vnNewUsers kassimo etc; sola vs # Ah9srsRkV2Q0E vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~kassimo; echo kassimo@ece.ubc.ca > .forward; chown kassimo.others .forward; ls -al; cat .forward' ############################################################ Thu Mar 14 08:50:53 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-03-14-0851.tar.gz # 1605 1605 30495 # laplace:/usr2/people/matt/system/vnArchive # 1605 1605 30495 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.03 LS | grep '^2002:03' | pre CP | post '../Rtop.2002.03' | csh cd /home/matt/system/vnshadow/Rtop.2002.03 vnAccount -p 20 mkdir ../vnAccount.02.03 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.03 . ############################################################ Sat Mar 16 06:52:44 PST 2002 ############################################################ (1) Adding NFS mount for Chi cat /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.cooperon cooperon.physics.ubc.ca:/export/data24/cooperon /d/vnfe1/home/chichen/data24 nfs rw,bg,hard,intr 0 0 vnallbgCommand 'cat /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.cooperon >> /etc/fstab' vnallbgCommand 'mount -a' ############################################################ Wed Mar 20 09:57:04 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-03-20-0957.tar.gz # 675 675 12825 # laplace:/usr2/people/matt/system/vnArchive # 675 675 12825 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:03' | pre CP | post '../Rtop.2002.03' | csh cd /home/matt/system/vnshadow/Rtop.2002.03 vnAccount -p 20 RM -r ../vnAccount.02.03 mkdir ../vnAccount.02.03 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.03 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.03 . ############################################################ Sat Mar 30 16:10:50 PST 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-03-30-1611.tar.gz # 1807 1807 34333 # laplace:/usr2/people/matt/system/vnArchive # 1807 1807 34333 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:03' | pre CP | post '../Rtop.2002.03' | csh cd /home/matt/system/vnshadow/Rtop.2002.03 vnAccount -p 20 RM -r ../vnAccount.02.03 mkdir ../vnAccount.02.03 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.03 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.03 . ############################################################ Wed Apr 3 14:00:02 PST 2002 ############################################################ (1) New Account for Luis D'Afonseca nu cat<akiles akiles:x:13204:600:Luis D'Afonseca:/d/vnfe1/home/akiles:/bin/tcsh END vnNewUsers akiles etc; sola vs # QtitsU.AHaImo vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~akiles; echo akiles@ime.unicamp.br > .forward; chown akiles.choptuik .forward; ls -al; cat .forward' ############################################################ Mon Apr 8 20:19:00 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-04-08-2020.tar.gz # 1637 1637 31103 # laplace:/usr2/people/matt/system/vnArchive # 1637 1637 31103 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:03' | pre CP | post '../Rtop.2002.03' | csh cd /home/matt/system/vnshadow/Rtop.2002.03 vnAccount -p 20 RM -r ../vnAccount.02.03 mkdir ../vnAccount.02.03 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.04 LS | grep '^2002:04' | pre CP | post '../Rtop.2002.04' | csh cd /home/matt/system/vnshadow/Rtop.2002.04 vnAccount -p 20 mkdir ../vnAccount.02.04 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.03 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.03 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.04 . ############################################################ Wed Apr 10 13:03:16 PDT 2002 ############################################################ (1) New Account for Markus Rumpfkeil nu cat<mpr mpr:x:12604:600:Markus Rumpfkeil:/d/vnfe1/home/mpr:/bin/tcsh END vnNewUsers mpr etc; sola vs # P6pbgeWoH7gmw vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~mpr; echo Rumpfkeil@gmx.de > .forward; chown mpr.choptuik .forward; ls -al; cat .forward' ############################################################ Thu Apr 11 08:53:09 PDT 2002 ############################################################ (1) vn43 down (frans running v. large memory job) 118: vn43 5 root 20 0 0 0 0 RW 0 28.6 0.0 25:52 kswapd 119: vn43 23461 fransp 20 0 428M 424M 812 R 0 19.7 84.0 24:15 graxi_ad_F 2002:04:10:1633.13 SEE README.CRASH CRASH_110 Kernel error, has happened several times before with this node SWAP OUT MEMORY NEXT TIME (ORDER MEMORY FROM BILL?) ############################################################ Sat Apr 13 07:17:50 PDT 2002 ############################################################ (1) New Account for Bruno Rousseau cd vn Arc README* vi README.USERS nu cat<rousseab rousseab:x:417:600:Bruno Rousseab:/d/vnfe1/home/rousseab:/bin/tcsh END vnNewUsers rousseab etc; sola vs # 5wZBqW6h4DvrQ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~rousseab; echo rousseab@physics.ubc.ca > .forward; chown rousseab.choptuik .forward; ls -al; cat .forward' ############################################################ Fri Apr 19 09:05:10 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-04-19-0905.tar.gz # 1677 1677 31863 # laplace:/usr2/people/matt/system/vnArchive # 1677 1677 31863 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:04' | pre CP | post '../Rtop.2002.04' | csh cd /home/matt/system/vnshadow/Rtop.2002.04 vnAccount -p 20 RM -rf ../vnAccount.02.04 mkdir ../vnAccount.02.04 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.04 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.04 . ############################################################ Tue Apr 23 15:37:57 PDT 2002 ############################################################ (1) New Account for Eric Hayashi cd vn Arc README* vi README.USERS nu cat<ehayashi ehayashi:x:9061:9000:Eric Hayashi:/d/vnfe1/home/ehayashi:/bin/tcsh END vnNewUsers ehayashi etc; sola vs # lkn88kz3pBDFc vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~ehayashi; echo ehayashi@beluga.phys.uvic.ca > .forward; chown ehayashi.other .forward; ls -al; cat .forward' ############################################################ Fri Apr 26 09:00:55 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-04-26-0901 # 1335 1335 25365 # laplace:/usr2/people/matt/system/vnArchive # 1335 1335 25365 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:04' | pre CP | post '../Rtop.2002.04' | csh cd /home/matt/system/vnshadow/Rtop.2002.04 vnAccount -p 20 RM -rf ../vnAccount.02.04 mkdir ../vnAccount.02.04 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.04 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.04 . ############################################################ Fri May 3 18:08:07 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-05-03-1809 # 1361 1361 25859 # laplace:/usr2/people/matt/system/vnArchive # cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:04' | pre CP | post '../Rtop.2002.04' | csh mkdir ../Rtop.2002.05 LS | grep '^2002:05' | pre CP | post '../Rtop.2002.05' | csh cd /home/matt/system/vnshadow/Rtop.2002.04 vnAccount -p 20 RM -rf ../vnAccount.02.04 mkdir ../vnAccount.02.04 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002.05 vnAccount -p 20 mkdir ../vnAccount.02.05 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.04 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.04 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.05 . ############################################################ Tue May 7 23:41:08 PDT 2002 ############################################################ (1) vn9 required reboot, eventually leading to memory replacement. See README.CRASH (CRASH_111) ############################################################ Tue May 7 23:41:08 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-05-12-0836 # 1435 1435 27265 # laplace:/usr2/people/matt/system/vnArchive # 1435 1435 27265 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:05' | pre CP | post '../Rtop.2002.05' | csh cd /home/matt/system/vnshadow/Rtop.2002.05 vnAccount -p 20 RM -rf ../vnAccount.02.05 mkdir ../vnAccount.02.05 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.05 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.05 . ############################################################ Mon May 13 14:48:01 PDT 2002 ############################################################ (1) New Account for Richard Mar cd vn Arc README* vi README.USERS nu cat<rmar rmar:x:9062:9000:Richard Mar:/d/vnfe1/home/rmar:/bin/bash END vnNewUsers rmar etc; sola vs # Vty3Vh4N6Ug4A vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~rmar; echo rmar@physics.ubc.ca > .forward; chown rmar.other .forward; ls -al; cat .forward' ############################################################ Mon May 20 16:30:41 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-05-20-1631 # 954 954 18126 # laplace:/usr2/people/matt/system/vnArchive # 954 954 18126 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:05' | pre CP | post '../Rtop.2002.05' | csh cd /home/matt/system/vnshadow/Rtop.2002.05 vnAccount -p 20 RM -rf ../vnAccount.02.05 mkdir ../vnAccount.02.05 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.05 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.05 . ############################################################ Thu May 30 09:59:52 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-05-30-1000 # 1845 1845 35055 # laplace:/usr2/people/matt/system/vnArchive # 1845 1845 35055 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:05' | pre CP | post '../Rtop.2002.05' | csh cd /home/matt/system/vnshadow/Rtop.2002.05 vnAccount -p 20 RM -rf ../vnAccount.02.05 mkdir ../vnAccount.02.05 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.05 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.05 . ############################################################ Tue Jun 4 12:00:26 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-06-04-1200 # 970 970 18430 # laplace:/usr2/people/matt/system/vnArchive # 970 970 18430 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:05' | pre CP | post '../Rtop.2002.05' | csh mkdir ../Rtop.2002.06 LS | grep '^2002:06' | pre CP | post '../Rtop.2002.06' | csh cd /home/matt/system/vnshadow/Rtop.2002.05 vnAccount -p 20 RM -rf ../vnAccount.02.05 mkdir ../vnAccount.02.05 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002.06 vnAccount -p 25 RM -rf ../vnAccount.02.06 mkdir ../vnAccount.02.06 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.05 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.05 . RM -r vnAccount.02.06 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.06 . ############################################################ Wed Jun 5 11:22:38 PDT 2002 ############################################################ (1) New Account for Sina Tootoonian cd vn Arc README* vi README.USERS nu cat<sinat sinat:x:12943:600:Sina Tootoonian:/d/vnfe1/home/sinat:/bin/tcsh END vnNewUsers sinat etc; sola vs # g.XfUWzF6llgg vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~sinat; echo achillesofpersis@hotmail.com > .forward; chown sinat.choptuik .forward; ls -al; cat .forward' ############################################################ Wed Jun 5 14:18:54 PDT 2002 ############################################################ (1) New Account for Ray Grout cd vn Arc README* vi README.USERS nu cat<rgrout END vnNewUsers rgrout etc; sola vs # gB4nnD.pc8DLg vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~rgrout; echo rgrout@mech.ubc.ca > .forward; chown rgrout.bushe .forward; ls -al; cat .forward' ############################################################ Sun Jun 9 09:07:48 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-06-09-0908 # 903 903 17157 # laplace:/usr2/people/matt/system/vnArchive # 903 903 17157 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:06' | pre CP | post '../Rtop.2002.06' | csh cd /home/matt/system/vnshadow/Rtop.2002.06 vnAccount -p 25 RM -rf ../vnAccount.02.06 mkdir ../vnAccount.02.06 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.06 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.06 . ############################################################ Mon Jun 17 20:08:19 PDT 2002 ############################################################ (1) PGI license daemon needed re-starting vnfeCommand 'killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ############################################################ Thu Jun 20 10:35:26 PDT 2002 ############################################################ (1) Installing 'sdftoxvs' # As root@bh1 cdi scp -q matt@laplace.physics.ubc.ca:/d/laplace/usr2/people/matt/system/bh/image/master/install/Installz.static . Installz.static xvs # As root@laplace cdftp mkdir xvs.linux.86 chown matt.choptuik xvs.linux.86 # As root@bh1 cd /var/tmp/install.static/xvs/src scp sdftoxvs xv1 xvn root@laplace.physics.ubc.ca:/d/laplace/usr2/people/ftp/pub/xvs.linux.86 # As matt@vnfe1 cd bin scp -q root@laplace.physics.ubc.ca:/d/laplace/usr2/people/ftp/pub/xvs.linux.86/sdftoxvs . scp -q root@laplace.physics.ubc.ca:/d/laplace/usr2/people/ftp/pub/xvs.linux.86/xv1 . scp -q root@laplace.physics.ubc.ca:/d/laplace/usr2/people/ftp/pub/xvs.linux.86/xvn . vnallbgCommand 'cd /usr/local/bin; CP /d/vnfe1/home/matt/bin/sdftoxvs .' vnallbgCommand 'cd /usr/local/bin; CP /d/vnfe1/home/matt/bin/xv1 .' vnallbgCommand 'cd /usr/local/bin; CP /d/vnfe1/home/matt/bin/xvn .' ############################################################ Sun Jun 23 09:46:49 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-06-23-0946 # 2715 2715 51585 # laplace:/usr2/people/matt/system/vnArchive # 2715 2715 51585 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:06' | pre CP | post '../Rtop.2002.06' | csh cd /home/matt/system/vnshadow/Rtop.2002.06 vnAccount -p 25 RM -rf ../vnAccount.02.06 mkdir ../vnAccount.02.06 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.06 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.06 . ############################################################ Mon Jun 24 17:10:56 PDT 2002 ############################################################ (1) New Account for Suresh Sivanandam cd vn Arc README* vi README.USERS nu cat<suresh suresh:x:9063:9000:Suresh Sivanandam:/d/vnfe1/home/suresh:/bin/tcsh END vnNewUsers suresh # Get password from physics.ubc.ca etc; sola vs # 6j.xmqwthYPb. vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~suresh; echo suresh@physics.ubc.ca > .forward; chown suresh.other .forward; ls -al; cat .forward' ############################################################ Wed Jun 26 12:14:40 PDT 2002 ############################################################ (1) New Account for Andrew Peters cd vn Arc README* vi README.USERS nu cat<peters peters:x:9064:9000:Andrew Peters:/d/vnfe1/home/peters:/bin/tcsh END vnNewUsers peters etc; sola vs # .6lLkLmwDNRDQ vnDistEtc shadow # Set .forward ssh root@vnfe1 'cd ~peters; echo peters@zoology.ubc.ca > .forward; chown peters.other .forward; ls -al; cat .forward' ############################################################ Sat Jun 29 07:17:50 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-06-29-0718 # 1149 1149 21831 # laplace:/usr2/people/matt/system/vnArchive # 1149 1149 21831 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:06' | pre CP | post '../Rtop.2002.06' | csh cd /home/matt/system/vnshadow/Rtop.2002.06 vnAccount -p 25 RM -rf ../vnAccount.02.06 mkdir ../vnAccount.02.06 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.06 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.06 . ############################################################ Wed Jul 3 18:10:00 PDT 2002 ############################################################ (1) Upgrading Openssh to 3.4p1, also upgraded zlib.a to 1.1.4 See /d/laplace/usr2/people/matt/system/LINUX/README vnallbgCommand 'at -f /d/vnfe1/home/matt/system/vn/scripts/restart-ssh 3AM July 5' ############################################################ Sat Jul 6 09:52:58 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-07-06-0953 # 1427 1427 27113 # laplace:/usr2/people/matt/system/vnArchive # 1427 1427 27113 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:06' | pre CP | post '../Rtop.2002.06' | csh mkdir ../Rtop.2002.07 LS | grep '^2002:07' | pre CP | post '../Rtop.2002.07' | csh cd /home/matt/system/vnshadow/Rtop.2002.06 vnAccount -p 20 RM -rf ../vnAccount.02.06 mkdir ../vnAccount.02.06 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002.07 vnAccount -p 20 mkdir ../vnAccount.02.07 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.06 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.06 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.07 . ############################################################ Sat Jul 13 09:20:04 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-07-13-0921 # 1167 1167 22173 # laplace:/usr2/people/matt/system/vnArchive # 1167 1167 22173 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:07' | pre CP | post '../Rtop.2002.07' | csh cd /home/matt/system/vnshadow/Rtop.2002.07 vnAccount -p 20 RM -rf ../vnAccount.02.07 mkdir ../vnAccount.02.07 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.07 . ############################################################ Tue Jul 16 13:37:28 PDT 2002 ############################################################ (1) New Account for Pal Sandhu cd vn vi README.USERS nu cat<psandhu psandhu:x:11812:600:Palbinder Sandhu:/d/vnfe1/home/psandhu:/bin/tcsh END vnNewUsers psandhu etc; sola vs # cC.n8HPpLEVdQ vnDistEtc shadow ssh root@vnfe1 'cd ~psandhu; echo psandhu@physics.ubc.ca > .forward; chown psandhu.choptuik .forward; ls -al; cat .forward' ############################################################ Wed Jul 17 07:25:46 PDT 2002 ############################################################ vn.physics.ubc.ca Compute Node Status: Wed Jul 17 07:15:00 PDT 2002 The following nodes are down: 1: vn2 down 11:55 Need to remove #cooperon from /etc/fstab etc scp root@vn1:/etc/fstab fstab.node # Commented-out cooperon mount foreach i (`iota 64`) scp fstab.node root@vn${i}:/etc/fstab & end vnallCommand 'grep coop /etc/fstab' #OK ############################################################ Tue Jul 23 07:34:21 PDT 2002 ############################################################ (1) Installing xforms headers on vn machines /usr/include/forms.h cp bxform-089-glibc2.1.tar.gz /d/vnfe1/home/matt/autoconf vnallbgCommand 'cdi; tar zxf /d/vnfe1/home/matt/autoconf/bxform-089-glibc2.1.tar.gz; cd bxform-089-glibc2.1; make empty; (cd FORMS; touch *.c; make); make install' libGLU ... missing, so no applications build vnallbgCommand 'ln -s /usr/X11R6/lib/libMesaGL.so /usr/X11R6/lib/libGL.so' vnallbgCommand 'ln -s /usr/X11R6/lib/libMesaGL.so /usr/lib/libGL.so' vnallbgCommand 'ln -s /usr/X11R6/lib/libMesaGLU.so /usr/X11R6/lib/libGLU.so' vnallbgCommand 'ln -s /usr/X11R6/lib/libMesaGLU.so /usr/lib/libGLU.so' vnallbgCommand 'cdi; Installz vutil' vnallbgCommand 'cdi; Installz utilio' vnallbgCommand 'cdi; Installz xvs' vnallbgCommand 'cdi; Installz rnpl' ############################################################ Tue Jul 23 18:01:38 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-07-23-1809 # 1834 1834 34846 # laplace:/usr2/people/matt/system/vnArchive # 1834 1834 34846 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:07' | pre CP | post '../Rtop.2002.07' | csh cd /home/matt/system/vnshadow/Rtop.2002.07 vnAccount -p 20 RM -rf ../vnAccount.02.07 mkdir ../vnAccount.02.07 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.07 . ############################################################ Thu Jul 25 16:58:06 PDT 2002 ############################################################ (1) sshd needs to be re-started on most machines at -f /d/vnfe1/home/matt/system/vn/scripts/restart-ssh now vnN -> vn48 vn49 vn50 vnNbgCommand 'at -f /d/vnfe1/home/matt/system/vn/scripts/restart-ssh now' OK vnallbgCommand 'at -f /d/vnfe1/home/matt/system/vn/scripts/restart-ssh now' ############################################################ Mon Jul 29 20:25:06 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-07-29-2025 # 825 825 15675 # laplace:/usr2/people/matt/system/vnArchive # 825 825 15675 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:07' | pre CP | post '../Rtop.2002.07' | csh cd /home/matt/system/vnshadow/Rtop.2002.07 vnAccount -p 25 RM -rf ../vnAccount.02.07 mkdir ../vnAccount.02.07 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.07 . ############################################################ Thu Aug 1 16:34:50 PDT 2002 ############################################################ (1) New Account for Magdalena Rucker cd vn vi README.USERS nu cat<rucker rucker:x:9065:9000:Magdalena Rucker:/d/vnfe1/home/rucker:/bin/tcsh END vnNewUsers rucker etc; sola vs # hY/Ux8K6EkPhs vnDistEtc shadow ssh root@vnfe1 'cd ~rucker; echo rucker@eos.ubc.ca > .forward; chown rucker.other .forward; ls -al; cat .forward' ############################################################ Tue Aug 6 12:19:04 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-08-06-1219 # 1347 1347 25593 # laplace:/usr2/people/matt/system/vnArchive # 1347 1347 25593 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:07' | pre CP | post '../Rtop.2002.07' | csh mkdir ../Rtop.2002.08 LS | grep '^2002:08' | pre CP | post '../Rtop.2002.08' | csh cd /home/matt/system/vnshadow/Rtop.2002.07 vnAccount -p 25 RM -rf ../vnAccount.02.07 mkdir ../vnAccount.02.07 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002.08 vnAccount -p 25 mkdir ../vnAccount.02.08 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.07 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.07 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.08 . ############################################################ Tue Aug 6 16:43:00 PDT 2002 ############################################################ (1) New Account for Daniel Green cd vn vi README.USERS nu cat<dgreen dgreen:x:9066:9000:Daniel Green:/d/vnfe1/home/dgreen:/bin/tcsh END vnNewUsers dgreen etc; sola vs # Got shadow entry from physics.ubc.ca (drgreen) # iHsp4KQrwuD2U vnDistEtc shadow ssh root@vnfe1 'cd ~dgreen; echo dangreen@telus.net > .forward; chown dgreen.other .forward; ls -al; cat .forward' ############################################################ Wed Aug 14 14:01:25 PDT 2002 ############################################################ (1) Archiving Rtop files cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-08-14-1402 # 1472 1472 27968 # laplace:/usr2/people/matt/system/vnArchive # 1472 1472 27968 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:08' | pre CP | post '../Rtop.2002.08' | csh cd /home/matt/system/vnshadow/Rtop.2002.08 vnAccount -p 30 RM -rf ../vnAccount.02.08 mkdir ../vnAccount.02.08 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.08 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.08 . ############################################################ Thu Aug 15 11:58:44 PDT 2002 ############################################################ (1) Modified matt@vnfe1:~/scripts/Rtop cat $OUT | sed 's?R N?R_N?' | sed 's?S N?S_N?' | sort -rn +10 | sed "s/^\(vn[1-9] \)/\1 /" | nnumber >> /tmp/TOP ############################################################ Tue Aug 20 11:13:06 PDT 2002 ############################################################ (1) New Account for Arman Rahmim ... already exists! # Clean up old, create new # As root@vnfe1 cd /d/vnfe1/home mv rahmim rahmim.O # As choptuik@physics sudo pwentry rahmim rahmim:x:204:20:Arman Rahmim:/home/rahmim:/usr/local/bin/tcsh rahmim:ydXx0GI8BtCGM:11310::::::-1 cd vn vi README.USERS nu cat<rahmim rahmim:x:204:9000:Arman Rahmim:/d/vnfe1/home/rahmim:/bin/tcsh END vnNewUsers rahmim etc; sola vs # ydXx0GI8BtCGM vnDistEtc shadow ssh root@vnfe1 'cd ~rahmim; echo rahmim@physics.ubc.ca > .forward; chown rahmim.other .forward; ls -al; cat .forward' # Send message informing Arman that account is ready, but that he already # had one ############################################################ Wed Aug 21 14:14:05 PDT 2002 ############################################################ (1) Setting up automount for PET group etc scp root@vnfe1:/etc/auto.misc . vnDistEtc /etc/auto.misc vnallbgCommand '/etc/rc.d/init.d/autofs start' vnallbgCommand '/sbin/chkconfig autofs on' ############################################################ Wed Aug 21 15:18:31 PDT 2002 ############################################################ (1) Hacked on 'vnAccount' to take into account clock rollover "feature". ############################################################ Wed Aug 14 14:01:25 PDT 2002 ############################################################ (1) Archiving Rtop files, first run after modifying vnAccount to disable cpu *= 2 if CPU% > 60. cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-08-23-1833 # 1686 1686 32034 # laplace:/usr2/people/matt/system/vnArchive # 1686 1686 32034 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:08' | pre CP | post '../Rtop.2002.08' | csh cd /home/matt/system/vnshadow/Rtop.2002.08 vnAccount -p 30 RM -rf ../vnAccount.02.08 mkdir ../vnAccount.02.08 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.08 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.08 . ############################################################ Tue Aug 27 14:55:01 PDT 2002 ############################################################ (1) New Account for Brad Davis (UBC Zoology GS, Whitlock supervisor) cd vn vi README.USERS nu cat<davis davis:x:9067:9000:Bradley Davis:/d/vnfe1/home/davis:/bin/bash END vnNewUsers davis etc; vs # Set password manually vnDistEtc shadow ssh root@vnfe1 'cd ~davis; echo davis@zoology.ubc.ca > .forward; chown davis.other .forward; ls -al; cat .forward' ############################################################ Tue Aug 27 17:36:39 PDT 2002 ############################################################ (1) New Account for Monojoy Goswami (UBC Chem PDF, Patey supervisor) cd vn vi README.USERS nu cat<monojoy monojoy:partha10:1254:1200:Monojoy Goswami:/d/vnfe2/home/monojoy:/bin/tcsh END vnNewUsers monojoy ssh root@vnfe1 'cd ~monojoy; echo monojoy@chem.ubc.ca > .forward; chown monojoy.patey .forward; ls -al; cat .forward' ############################################################ Tue Aug 27 17:41:32 PDT 2002 ############################################################ (1) rwhod dying on some nodes? matt@vnfe1 ~monojoy]$ down vn1 down 2:58 vn10 down 3:04 # As root@vn{1,10} rwhod ############################################################ Thu Aug 29 07:04:31 PDT 2002 ############################################################ (1) rwhod not running on vn1 again # As root@vn1 rwhod ############################################################ Thu Aug 29 16:31:20 PDT 2002 ############################################################ (1) Sune Jespersen reports trouble allocating more than 95 Mbyte vnCommand 'cd ~matt/debug/tmem; ./Tmem 210000000' # fails on vn5, vn10, vn33, vn43 # Cleaning up rahmim's dead jobs vnallbgCommand 'ps -elf | grep rahmim | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Sun Sep 1 08:37:26 PDT 2002 ############################################################ (1) Archiving Rtop files, second run after modifying vnAccount to disable cpu *= 2 if CPU% > 60. cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-09-01-0837 # 1320 1320 25080 # laplace:/usr2/people/matt/system/vnArchive cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 LS | grep '^2002:08' | pre CP | post '../Rtop.2002.08' | csh cd /home/matt/system/vnshadow/Rtop.2002.08 vnAccount -p 30 RM -rf ../vnAccount.02.08 mkdir ../vnAccount.02.08 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.08 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.08 . ############################################################ Tue Sep 3 13:17:19 PDT 2002 ############################################################ (1) New Account for Bruce Ainslie (UBC EOS GS, Steyn supervisor) cd vn vi README.USERS nu cat<bainslie bainslie:x:9068:9000:Bruce Ainslie:/d/vnfe1/home/bainslie:/bin/csh END vnNewUsers bainslie etc sola; vs # du2cpijHpWSDg vnDistEtc shadow ssh root@vnfe1 'cd ~bainslie; echo bainslie@eos.ubc.ca > .forward; chown bainslie.other .forward; ls -al; cat .forward' ############################################################ Tue Sep 3 20:40:32 PDT 2002 ############################################################ (1) Archiving Rtop files, third run after modifying vnAccount to disable cpu *= 2 if CPU% > 60. cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-09-03-2040 # 436 436 8284 # laplace:/usr2/people/matt/system/vnArchive # 436 436 8284 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.09 LS | grep '^2002:09' | pre CP | post '../Rtop.2002.09' | csh cd /home/matt/system/vnshadow/Rtop.2002.09 # overduin's usage very heavy of late vnAccount -p 45 mkdir ../vnAccount.02.09 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.09 . ############################################################ Fri Sep 6 20:32:36 PDT 2002 ############################################################ (1) New Account for Ingrid Stairs istairs:x:9083:20:Ingrid Stairs:/home/istairs:/usr/local/bin/tcsh istairs:ovcAsDTgKxrfg:11933:::::: cd vn vi README.USERS nu cat<istairs istairs:x:9083:9000:Ingrid Stairs:/d/vnfe1/home/istairs:/bin/tcsh END vnNewUsers istairs etc sola; vs # ovcAsDTgKxrfg vnDistEtc shadow ssh root@vnfe1 'cd ~istairs; echo stairs@physics.ubc.ca > .forward; chown istairs.other .forward; ls -al; cat .forward' ############################################################ Sun Sep 8 16:55:27 PDT 2002 ############################################################ (1) New Account for Hiranya Vajramani Peiris, Princeton GS working on analysis of MAP (?) data---via Mark Halpern cd vn vi README.USERS nu cat<hiranya hiranya:x:9069:9000:Hiranya Vajramani Peiris:/d/vnfe1/home/hiranya:/bin/bash END vnNewUsers hiranya etc sola; vs TODO # TF2buJdlWc.6g vnDistEtc shadow ssh root@vnfe1 'cd ~hiranya; echo hiranya@astro.princeton.edu > .forward; chown hiranya.other .forward; ls -al; cat .forward' ############################################################ Mon Sep 9 17:52:52 PDT 2002 ############################################################ (1) Group for Halpern etc make import vi group # Added halpern::9100: vnDistEtc group # As root@vnfe1 cd /home chgrp -R halpern halpern chgrp -R halpern hiranya ############################################################ Mon Sep 9 17:56:31 PDT 2002 ############################################################ (1) New Account for Licia Verde, Peiris, Princeton GS working on analysis of MAP (?) data---via Mark Halpern cd vn vi README.USERS nu cat<lverde lverde:x:9070:9100:Licia Verde:/d/vnfe1/home/lverde:/bin/tcsh END vnNewUsers lverde etc sola; vs TODO # vnDistEtc shadow ssh root@vnfe1 'chmod -R og-rwx /home/lverde' ssh root@vnfe1 'cd ~lverde; echo lverde@astro.princeton.edu > .forward; chown lverde.halpern .forward; ls -al; cat .forward' ############################################################ Tue Sep 10 09:27:50 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-09-10-0928 # 1168 1168 22192 # laplace:/usr2/people/matt/system/vnArchive # 1168 1168 22192 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.09 mkdir ../Rtop.2002.09 LS | grep '^2002:09' | pre CP | post '../Rtop.2002.09' | csh cd /home/matt/system/vnshadow/Rtop.2002.09 vnAccount -p 45 RM -r ../vnAccount.02.09 mkdir ../vnAccount.02.09 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.09 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.09 . ############################################################ Thu Sep 12 07:55:27 PDT 2002 ############################################################ (1) R. Mar filling up /d/vnfe1 quickly vnallbgCommand 'ps -elf | grep rmar | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Thu Sep 12 20:45:46 PDT 2002 ############################################################ vn27 down 0:29 # Will handle in AM # See README.CRASH (CRASH_114) ############################################################ Fri Sep 13 17:59:14 PDT 2002 ############################################################ (1) New Account for Colin Blair, Bushe grad student 1616 cblair # cd vn vi README.USERS nu cat<cblair cblair:x:1616:1600:Colin Blair:/d/vnfe3/home/cblair:/bin/bash END vnNewUsers cblair TODO etc sola; vs # jMPKbWIdHcnuU vnDistEtc shadow ssh root@vnfe3 'cd ~cblair; echo cblair@mech.ubc.ca > .forward; chown cblair.bushe .forward; ls -al; cat .forward' ############################################################ Mon Sep 16 20:51:28 PDT 2002 ############################################################ (1) Rtop hangs up on 'ssh vn27' since home directory is on vnfe1, as perr Scott N's suggestion, change idle's home directory to /d/vnfe2/home/idle etc Arc passwd vi passwd vnDistEtc passwd ############################################################ Tue Sep 17 20:36:18 PDT 2002 ############################################################ (1) Adding Ingrid's disks to automount configuration etc vi auto.misc # Added psr1 -rw,hard,intr ariel.astro.ubc.ca:/export/psr1 ASTRO -ro,hard,intr ariel.astro.ubc.ca:/ASTRO vnDistEtc auto.misc vnallbgCommand '/etc/rc.d/init.d/autofs start' ############################################################ Wed Sep 18 16:21:51 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-09-18-1622 # 1338 1338 25422 # laplace:/usr2/people/matt/system/vnArchive # 1338 1338 25422 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.09 mkdir ../Rtop.2002.09 LS | grep '^2002:09' | pre CP | post '../Rtop.2002.09' | csh cd /home/matt/system/vnshadow/Rtop.2002.09 vnAccount -p 45 RM -r ../vnAccount.02.09 mkdir ../vnAccount.02.09 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.09 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.09 . ############################################################ Tue Sep 24 18:03:10 PDT 2002 ############################################################ (1) NFS mounting bh7:/home2 on cluster vnallbgCommand 'mkdir -p /d/bh7/home2' vnallbgCommand 'echo bh7:/home2 /d/bh7/home2 nfs rw,bg,hard,intr 0 0 >> /etc/fstab' vnallbgCommand 'mount bh7:/home2' vnallCommand 'df | grep bh7' ############################################################ Tue Sep 24 20:41:19 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-09-24-2041 # 1145 1145 21755 # laplace:/usr2/people/matt/system/vnArchive # 1145 1145 21755 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.09 mkdir ../Rtop.2002.09 LS | grep '^2002:09' | pre CP | post '../Rtop.2002.09' | csh cd /home/matt/system/vnshadow/Rtop.2002.09 vnAccount -p 45 RM -r ../vnAccount.02.09 mkdir ../vnAccount.02.09 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.09 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.09 . ############################################################ Thu Sep 26 18:11:57 PDT 2002 ############################################################ (1) New Account for Richard Ree (Ag Sci PDF, Cronk super) 9071 rree # Richard Ree (UBC Ag Sci GS, Cronk) cd vn vi README.USERS nu cat<rree rree:x:9071:9000:Richard Ree:/d/vnfe1/home/rree:/bin/bash END vnNewUsers rree TODO etc sola; vs # $1$wDAElqAF$qYcyV5k1NdQhDOErOJwxR1 vnDistEtc shadow ssh root@vnfe1 'cd ~rree; echo ree@interchange.ubc.ca > .forward; chown ree.other .forward; ls -al; cat .forward' ############################################################ Mon Sep 30 14:54:57 PDT 2002 ############################################################ From scn@warp.physics.ubc.ca Mon Sep 30 14:40:01 2002 Hey Matt, I couldn't ssh into vn43, but I could telnet in. when I did, sshd was not running and so I started it. Now, it's working fine. I'm not sure why sshd went down, however I noticed a few things strange in the logs. Using 'last', one sees that the last login (before sshd crashed) was for 'idle' at: idle pts/0 vnfe1.physics.ub Sun Sep 29 06:03 - 06:03 (00:00) A few minutes before this last login, there were a few messages like the following: Sep 29 05:55:37 vn43 kernel: swap_duplicate: entry 10000000, offset exceeds max Sep 29 05:55:37 vn43 kernel: VM: killing process sh Sep 29 05:55:37 vn43 kernel: swap_free: offset exceeds max Sep 29 05:55:37 vn43 last message repeated 14 times And today: Sep 29 10:40:10 vn43 -- MARK -- Sep 29 11:00:10 vn43 -- MARK -- ... Sep 30 14:00:14 vn43 -- MARK -- Sep 30 14:20:14 vn43 -- MARK -- Sep 30 14:22:30 vn43 PAM_pwdb[18214]: (login) session opened for user scn by (uid=0) The first instance of this "MARK" log message is in /var/log/messages.1 : Sep 25 20:40:00 vn43 -- MARK -- What do you make of all this? I'm guessing the swap problem caused sshd to crash, but I'm not sure what "caused" the "MARK" log entries. scott n. ############################################################ Tue Oct 1 08:26:42 PDT 2002 ############################################################ vn43 down 11:39 # see README.CRASH (CRASH_115) # Note: this is the 7th time vn43 has crashed. Perhaps should # swap out memory ############################################################ Tue Oct 1 09:03:47 PDT 2002 ############################################################ No reply from tzenova re jobs ... kill them vnallbgCommand 'ps -elf | grep tzenova | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Tue Oct 1 09:34:07 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-10-01-0934 # 964 964 18316 # laplace:/usr2/people/matt/system/vnArchive # 964 964 18316 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.09 mkdir ../Rtop.2002.09 LS | grep '^2002:09' | pre CP | post '../Rtop.2002.09' | csh cd /home/matt/system/vnshadow/Rtop.2002.09 vnAccount -p 30 RM -r ../vnAccount.02.09 mkdir ../vnAccount.02.09 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.09 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.09 . ############################################################ Wed Oct 2 17:59:17 PDT 2002 ############################################################ (1) Scott replaced vn43's memory ############################################################ Thu Oct 3 15:02:58 PDT 2002 ############################################################ (1) Deleting minghe's account as per his request etc # Changed passwd, shadow vnDistEtc passwd shadow # As root@vnfe1 cd /home RM -rf minghe ############################################################ Sat Oct 5 17:37:28 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-10-05-1738 # 792 792 15048 # laplace:/usr2/people/matt/system/vnArchive # 792 792 15048 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.10 LS | grep '^2002:10' | pre CP | post '../Rtop.2002.10' | csh cd /home/matt/system/vnshadow/Rtop.2002.10 vnAccount -p 35 mkdir ../vnAccount.02.10 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.10 . ############################################################ Wed Oct 9 18:01:00 PDT 2002 ############################################################ (1) Magdalena reports mkdir: cannot create directory `/tmp/rucker': No space left on device vnCommmand > /tmp/DFDF /9[0-9]% vn10 97% (overduin, plischke) 5099062 fransp 2831342 luisl vn13 92% (plischke x 2) 3543376 luisl 2569767 fransp 2284429 scn vn33 94% (plischke/rucker) 6980664 fransp 832163 luisl vn39 100% (rucker) 8007273 fransp 1660887 scn vn62 92% (monojoy) 5041617 luisl 2720087 fransp ############################################################ Thu Oct 10 15:17:00 PDT 2002 ############################################################ Zapping suresh's defunct mpi jobs vnallbgCommand 'ps -elf | grep suresh | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Sat Oct 12 09:12:56 PDT 2002 ############################################################ (1) Changed DNS default to hub.ubc.ca search physics.ubc.ca nameserver 137.82.1.1 nameserver 137.82.28.3 nameserver 142.103.236.1 ############################################################ Mon Oct 14 09:24:59 PDT 2002 ############################################################ vnallbgCommand '/etc/rc.d/init.d/mysql stop' vnallbgCommand '/etc/rc.d/init.d/postgresql stop' vnallbgCommand 'chkconfig postgresql off' ############################################################ Tue Oct 15 15:40:15 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-10-15-1540 # 1043 1043 19817 # laplace:/usr2/people/matt/system/vnArchive # 1043 1043 19817 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.10 mkdir ../Rtop.2002.10 LS | grep '^2002:10' | pre CP | post '../Rtop.2002.10' | csh cd /home/matt/system/vnshadow/Rtop.2002.10 vnAccount -p 35 RM -r ../vnAccount.02.10 mkdir ../vnAccount.02.10 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.10 . ############################################################ Thu Oct 17 16:29:04 PDT 2002 ############################################################ (1) New Account for Murray McCutcheon # From physics murray:x:577:9000:Murray McCutcheon:/home/murray:/usr/local/bin/tcsh murray:yo6ynCd6qJYiY:11933:::::: cd vn vi README.USERS nu cat<murray murray:x:577:20000:Murray McCutcheon:/d/vnfe1/home/murray:/bin/tcsh END vnNewUsers murray TODO etc sola; vs # yo6ynCd6qJYiY vnDistEtc shadow ssh root@vnfe1 'cd ~murray; echo murray@physics.ubc.ca > .forward; chown murray.other .forward; ls -al; cat .forward' ############################################################ Mon Oct 21 11:50:43 PDT 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-10-21-1150 # 1030 1030 19570 # laplace:/usr2/people/matt/system/vnArchive cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.10 mkdir ../Rtop.2002.10 LS | grep '^2002:10' | pre CP | post '../Rtop.2002.10' | csh cd /home/matt/system/vnshadow/Rtop.2002.10 vnAccount -p 35 RM -r ../vnAccount.02.10 mkdir ../vnAccount.02.10 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.10 . ############################################################ Fri Oct 25 16:55:44 PDT 2002 ############################################################ (1) New Account for Reza Ejtehadi (ejtehadi) # From physics ejtehadi:x:601:20:Reza Ejtehadi:/home/ejtehadi:/usr/local/bin/tcsh ejtehadi:0duDwcBD0G83A:11970:::::: cd vn vi README.USERS 601 ejtehadi # Reza Ejtehadi (UBC P&A PD, Plotkin) nu cat<ejtehadi ejtehadi:x:601:9000:Reza Ejtehadi:/d/vnfe1/home/ejtehadi:/bin/tcsh END vnNewUsers ejtehadi telnet vnfe1 telnet vn35 TODO etc sola; vs # 0duDwcBD0G83A vnDistEtc shadow ssh root@vnfe1 'cd ~ejtehadi; echo ejtehadi@physics.ubc.ca > .forward; chown ejtehadi.other .forward; ls -al; cat .forward' ############################################################ Mon Oct 28 14:35:49 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-10-28-1435 # 779 779 14801 # laplace:/usr2/people/matt/system/vnArchive # 779 779 14801 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.10 mkdir ../Rtop.2002.10 LS | grep '^2002:10' | pre CP | post '../Rtop.2002.10' | csh cd /home/matt/system/vnshadow/Rtop.2002.10 vnAccount -p 35 RM -r ../vnAccount.02.10 mkdir ../vnAccount.02.10 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.10 . ############################################################ Tue Oct 29 13:24:52 PST 2002 ############################################################ (1) vnfe1 hung at about 12 Noon (see README.CRASH CRASH_116) Problems with df on vn16 vn39 vn42 vn45 vn52 vn54 (2) vn24 also apparently hung (see README.CRASH CRASH_117) vnallbgCommand 'ps -elf | grep chichen | grep -v grep | nth 4 | pre kill -9 | csh' ############################################################ Thu Oct 31 07:25:47 PST 2002 ############################################################ vn40 down 3:27 see README.CRASH CRASH_118 ############################################################ Sat Nov 2 07:34:09 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-11-02-0734 # 823 823 15637 # laplace:/usr2/people/matt/system/vnArchive # 823 823 15637 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -r ../Rtop.2002.10 mkdir ../Rtop.2002.10 LS | grep '^2002:10' | pre CP | post '../Rtop.2002.10' | csh cd /home/matt/system/vnshadow/Rtop.2002.10 vnAccount -p 35 RM -r ../vnAccount.02.10 mkdir ../vnAccount.02.10 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -r vnAccount.02.10 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.10 . ############################################################ Tue Nov 5 09:32:19 PST 2002 ############################################################ (1) Removing root's crontab on nodes vnCommand 'crontab -r' (2) Cleaning up /tmp/DATE files vnallbgCommand 'test -f /tmp/DATE && /bin/rm -f /tmp/DATE' ############################################################ Thu Nov 7 14:23:05 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-11-07-1423 # 967 967 18373 # laplace:/usr2/people/matt/system/vnArchive # 967 967 18373 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 mkdir ../Rtop.2002.11 LS | grep '^2002:11' | pre CP | post '../Rtop.2002.11' | csh cd /home/matt/system/vnshadow/Rtop.2002.11 vnAccount -p 30 mkdir ../vnAccount.02.11 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.11 . ############################################################ # NEGATIVE USAGE TIME BUG IN vnAccount NEEDS TO BE RESOLVED ############################################################ ############################################################ Tue Nov 12 17:03:12 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-11-12-1704 # 946 946 17974 # laplace:/usr2/people/matt/system/vnArchive # 946 946 17974 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -rf ../Rtop.2002.11 mkdir ../Rtop.2002.11 LS | grep '^2002:11' | pre CP | post '../Rtop.2002.11' | csh cd /home/matt/system/vnshadow/Rtop.2002.11 vnAccount -p 30 RM -rf ../vnAccount.02.11 mkdir ../vnAccount.02.11 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.11 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.11 . ############################################################ Fri Nov 15 14:54:05 PST 2002 ############################################################ (1) Tentative fix for vnAccount bug, maintain explicit info. concerning which nodes have been up long enough to negate PS bug. Currently, will need to modify script as nodes crash, roll over ... Apparently there is *another* bug in PS/TOP which causes run times to be *doubled* (rather than halved) after about 320 days. Will be challenging, to say the least, to fix the script. For time being, will simply stop posting statistics. ############################################################ Mon Nov 18 18:15:37 PST 2002 ############################################################ (1) Eric Nodwell filled up vnfe1:/home Killed all his jobs and recovered about 3.6 Gbyte ############################################################ Wed Nov 20 17:00:16 PST 2002 ############################################################ (1) New Account for Daniel Promislow (promislow) vi README.USERS 9072 promislow # Daniel Promislow (Visiting Prof., Otto, Zoology) nu cat<promislow promislow:x:9072:9000:Daniel Promislow:/d/vnfe1/home/promislow:/bin/tcsh END vnNewUsers promislow telnet vnfe1 telnet vn35 TODO etc sola; vs # gBqUcTBRYB3ug vnDistEtc shadow ssh root@vnfe1 'cd ~promislow; echo promislo@zoology.ubc.ca > .forward; chown promislo.other .forward; ls -al; cat .forward' ############################################################ Thu Nov 28 19:26:30 PST 2002 ############################################################ vn34 down 1:06 Machine un-pingable Frans / Ingrid running 2002:11:28:1819.40:36: vn34 366 fransp 18 2 463M 445M 2236 R_N \ 0 98.5 88.1 6961m graxi_ad 2002:11:28:1819.40:111: vn34 1586 istairs 14 2 488 488 420 R_N \ 0 59.8 0.0 0:16 fold see README.CRASH CRASH_119 ############################################################ Thu Nov 28 19:31:58 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-11-28-1932 # 2818 2818 53542 # laplace:/usr2/people/matt/system/vnArchive # 2818 2818 53542 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -rf ../Rtop.2002.11 mkdir ../Rtop.2002.11 LS | grep '^2002:11' | pre CP | post '../Rtop.2002.11' | csh ############################################################ Tue Dec 3 09:13:18 PST 2002 ############################################################ vn47 down 2:14 Machine un-pingable Frans / Sarah Overduin running 2002:12:03:0654.26:33: vn47 29826 fransp 11 2 464M 455M 2004 R_N 0 99.9 90.0 5517m graxi_ad_V 2002:12:03:0654.26:34: vn47 10981 overduin 14 2 13120 11M 264 R_N 0 99.9 2.3 25739m ABCwalls.e see README.CRASH CRASH_120 ############################################################ Sat Dec 7 10:34:46 PST 2002 ############################################################ (1) Cleaning up hiranya/murray's residual MPI jobs vnallbgCommand 'ps -elf | grep hiranya | grep -v grep | nth 4 | pre kill -9 | csh' vnallbgCommand 'ps -elf | grep murray | grep -v grep | nth 4 | pre kill -9 | csh' vnCommand 'ps -elf | grep hiranya | grep -v grep' > /tmp/HIRANYA & vnCommand 'ps -elf | grep murray | grep -v grep' > /tmp/MURRAY & ############################################################ Mon Dec 9 11:13:31 PST 2002 ############################################################ (1) Setting cvs's password to match that for Eric H on laplace ############################################################ Wed Dec 11 08:49:49 PST 2002 ############################################################ (1) Moving suresh, jfn from vnfe1:/home to vnfe1:/home2 ############################################################ Thu Nov 28 19:31:58 PST 2002 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2002-12-16-1718 # 3066 3066 58254 # laplace:/usr2/people/matt/system/vnArchive # 3066 3066 58254 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -rf ../Rtop.2002.11 mkdir ../Rtop.2002.11 LS | grep '^2002:11' | pre CP | post '../Rtop.2002.11' | csh mkdir ../Rtop.2002.12 LS | grep '^2002:12' | pre CP | post '../Rtop.2002.12' | csh ############################################################ Mon Dec 30 08:42:30 PST 2002 ############################################################ Scott reports problems getting into vn14, vn15 See README.CRASH CRASH_121, CRASH_122 sjespers apparently running on both Login: sjespers Name: Sune Norhoj Jespersen Directory: /d/vnfe1/home/sjespers Shell: /bin/bash Never logged in. Mail forwarded to sjespers@sfu.ca RESTORE vnN, vnDistEtc motd ############################################################ Fri Jan 10 08:57:46 PST 2003 ############################################################ Inaki reports problem with sm on vnfe1, binary is different than that on bh, lnx machines. Not sure where/how Scott installed it from but will re-install vnallbgCommand 'cd /usr/local/bin; scp root@bh0.physics.ubc.ca:/usr/local/bin/sm .' ssh_exchange_identification: Connection closed by remote host >>> Executing as root@142.103.237.52 -rwxr-xr-x 1 root root 1078113 Jan 3 2002 /usr/local/bin/sm* Fixed manually. Appears to have fixed problem. ############################################################ Fri Jan 10 09:33:36 PST 2003 ############################################################ (1) New account for Aviv Keshet, undergrad working with Marziali From physics sudo pwenrty akeshet:x:12770:400:Aviv Keshet:/home/ugrad/akeshet:/bin/tcsh akeshet:yC0WJLilRSHGY:11578::::::-1 From: Aviv Keshet To: Matt Choptuik Subject: Request for vn account 1) Full Name: Aviv Keshet 2) Preferred Login Name: akeshet 3) Alternate Login Name: aviv 4) Group: Other 5) Preferred e-mail: akeshet@interchange.ubc.ca 6) Contact Phone Number: (604) 822-4514 7) Preferred Shell: bash 8) Title: I am a undergrad co-op student working in Dr. Marziali's lab. I am continuing the work of Dan Green, who used this cluster for computing simulations of polymers translocating through a pore. Thanks, - Aviv Keshet vi README.USERS 12770 akeshet # Aviv Keshet (UBC PHAS UG, Marziali) nu cat<akeshet akeshet:x:12770:9000:Aviv Keshet:/d/vnfe1/home/akeshet:/bin/bash END vnNewUsers akeshet telnet vnfe1 telnet vn35 etc sola; vs # yC0WJLilRSHGY vnDistEtc shadow ssh root@vnfe1 'cd ~akeshet; echo akeshet@interchange.ubc.ca > .forward; chown akeshet.other .forward; ls -al; cat .forward' ############################################################ Fri Jan 10 09:50:39 PST 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-01-10-0950 # 2912 2912 55328 # laplace:/usr2/people/matt/system/vnArchive # 2912 2912 55328 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} MV 2002* .. MV 2003* .. cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2002 RM -rf ../Rtop.2002.12 mkdir ../Rtop.2002.12 LS | grep '^2002:12' | pre CP | post '../Rtop.2002.12' | csh mkdir ../Rtop.2003.01 LS | grep '^2003:01' | pre CP | post '../Rtop.2003.01' | csh mkdir ../Rtop.2003 LS | grep '^2003:01' | pre MV | post '../Rtop.2003' | csh ############################################################ Fri Feb 7 14:58:48 PST 2003 ############################################################ (1) New account for Stefan Avall, undergrad working with Plotkin From physics sudo pwenrty savall:x:11536:400:Stefan Per Avall:/home2/savall:/bin/tcsh savall:Avj5xxUZRMFa6:11939::::::-1 From s_avall@yahoo.ca Wed Feb 5 14:04:39 2003 To: choptuik@physics.ubc.ca 1. Stefan Avall 2. savall 3. stefan 4. Other 5. s_avall@yahoo.ca 6. 604-318-9504 7. bash 8. I am an undergraduate doing my honours thesis in Steven Plotkins group. We will be using the cluster to do Molecular Dynamics Simulations. vi README.USERS 11536 savall # Stefan Avall (UBC PHAS UG, Plotkin) nu cat<savall savall:x:11536:9000:Stefan Avall:/d/vnfe1/home/savall:/bin/bash END vnNewUsers savall telnet vnfe1 telnet vn35 etc sola; vs # Avj5xxUZRMFa6 vnDistEtc shadow ssh root@vnfe1 'cd ~savall; echo s_avall@yahoo.ca > .forward; chown savall.other .forward; ls -al; cat .forward' ############################################################ Wed Feb 12 15:45:17 PST 2003 ############################################################ (1) New account for Jorge Lozada-Ramirez, GS with Bushe jTgQV6Y8thcpA Hope that's everything you need. Cheers, Jorge 1) Full Name: Jorge Lozada-Ramirez 2) Preferred Login Name: jlozada 3) Alternate Login Name (if preferred unavailable/not allowed): jorgel 4) Group (see notes below): Bushe 5) Preferred e-mail: jlozada@mech.ubc.ca 6) Contact Phone Number: 604 562 8459 7) Preferred Shell (see notes below): bash vi README.USERS 1617 jlozada # Jorge Lozada-Ramirez nu cat<jlozada jlozada:x:1617:1600:Jorge Lozada-Ramirez:/d/vnfe3/home/jlozada:/bin/bash END vnNewUsers jlozada telnet vnfe1 telnet vn35 etc sola; vs # jTgQV6Y8thcpA vnDistEtc shadow ssh root@vnfe1 'cd ~jlozada; echo jlozada@mech.ubc.ca > .forward; chown jlozada.bushe .forward; ls -al; cat .forward' ############################################################ Mon Mar 3 16:14:14 PST 2003 ############################################################ (1) Moved Stefan Avall from /d/vnfe1/home to /d/vnfe1/home2 ############################################################ Tue Mar 4 08:48:03 PST 2003 ############################################################ (1) Compiled amber7 for Avall using intel compilers on bh0, install amber bigsander sander in /usr/local/bin vnallbgCommand 'CP /d/vnfe1/home/matt/amber/sander /usr/local/bin' ############################################################ Wed Mar 5 22:04:12 PST 2003 ############################################################ (1) New account for Bruno Mundim vi README.USERS 570 bruno # Bruno Mundim nu cat<bruno bruno:x:570:600:Bruno Mundim:/d/vnfe1/home/bruno:/bin/tcsh END vnNewUsers bruno telnet vnfe1 telnet vn35 etc sola; vs # BlRw3Ngi7S9nk vnDistEtc shadow ssh root@vnfe1 'cd ~bruno; echo bruno@physics.ubc.ca > .forward; chown bruno.choptuik .forward; ls -al; cat .forward' ############################################################ Mon Mar 24 08:42:10 PST 2003 ############################################################ (1) "Disabling" stocki account, and killing running tasks Old shadow entry stocki:$1$O43eQV.M$ub6nJRD.Px6V0PKhbojZB0:11089:0:99999:7:-1:-1:134542800 # Added to ~/.login cat<camortis camortis:x:9073:9000:Cameron Ortis:/d/vnfe1/home/camortis:/bin/bash END vnNewUsers camortis telnet vnfe1 telnet vn35 etc sola; vs # $1$DhyG8Go5$TM3b5A8TvrDhmUsEkTfqI0 vnDistEtc shadow ssh root@vnfe1 'cd ~camortis; echo camortis@interchange.ubc.ca > .forward; chown camortis.other .forward; ls -al; cat .forward' ############################################################ Tue Mar 25 08:40:00 PST 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-03-25-0840 # 11804 11804 224276 # laplace:/usr2/people/matt/system/vnArchive # 11804 11804 224276 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:01' | pre CP | post '../Rtop.2003.01' | csh mkdir ../Rtop.2003.02 LS | grep '^2003:02' | pre CP | post '../Rtop.2003.02' | csh mkdir ../Rtop.2003.03 LS | grep '^2003:03' | pre CP | post '../Rtop.2003.03' | csh ############################################################ Tue Mar 25 10:25:56 PST 2003 ############################################################ (1) Rearranging storage on vnfe1:/home # As root@vnfe1 cd /home mv stocki ghlim ehonda dale /home2 ln -s /home2/stocki . ln -s /home2/ghlim . ln -s /home2/dale . # As matt@bh0 mkdir -p /home/matt/system/vnfe1/Rtop cd /d/vnfe1/system/vnshadow mv Rtop.1999* Rtop.2000* Rtop.2001* /home/matt/system/vnfe1/Rtop ############################################################ Tue Apr 22 12:53:42 PDT 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-04-22-1253 # 5162 5162 98078 # laplace:/usr2/people/matt/system/vnArchive # 5162 5162 98078 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:03' | pre CP | post '../Rtop.2003.03' | csh mkdir ../Rtop.2003.04 LS | grep '^2003:04' | pre CP | post '../Rtop.2003.04' | csh ############################################################ Wed Apr 30 14:13:11 PDT 2003 ############################################################ (1) New account for Mei Wang, GS with Kendal Bushe, Mech Eng From maggie@mech.ubc.ca Wed Apr 30 13:43:59 2003 Sender: "Mei Wang" Date: Wed, 30 Apr 2003 13:46:58 -0700 From: maggie Organization: ubc X-Accept-Language: zh, en, zh-CN MIME-Version: 1.0 To: choptuik@physics.ubc.ca Subject: request for vn account Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit X-MailScanner-Information: Please contact the ISP for more information X-MailScanner: Found to be clean -- Hi, Please set up an account for me, my information is as below: 1) Full Name:mei wang 2) Preferred Login Name:maggie 3) Alternate Login Name (if preferred unavailable/not allowed):meiwang 4) Group (see notes below):bushe 5) Preferred e-mail:maggie@iam.ubc.ca 6) Contact Phone Number:604-822-2279 7) Preferred Shell (see notes below):tcsh FILL OUT THE FOLLOWING ITEM *ONLY* IF YOU LISTED "OTHER" AS YOUR GROUP 8) Title (Faculty, post-doc, grad student, undergrad ...) and brief description of anticpated usage of cluster. If post-doc or grad student, please include name of research supervisor. I'm a graduate student, my supervisor is kendal bushe. Thanks a lot. +++++++++++++++++++++++++++++++++++ regards, wang,mei email: maggie@iam.ubc.ca Department of Mathematics 121-1984 mathematics road vancouver BC,V6T 1Z2,canada Tel(office) : 604-822-2279 icq: 170623228 vi README.USERS 1618 maggie # Mei Wang nu cat<maggie maggie:x:1618:1600:Mei Wang:/d/vnfe3/home/maggie:/bin/tcsh END vnNewUsers maggie telnet vnfe1 telnet vn35 etc sola; vs TODO # vnDistEtc shadow ssh root@vnfe3 'cd ~maggie; echo maggie@iam.ubc.ca > .forward; chown maggie.bushe .forward; ls -al; cat .forward' ############################################################ Fri May 2 10:04:14 PDT 2003 ############################################################ (1) New account for Tom Davis, GS with Marcel Franz, Physics # From physics tdavis:x:457:307:Tom Davis:/home/tdavis:/bin/tcsh tdavis:UPSNPUK9PQWEo:11200::::::-1 vi README.USERS 457 tdavis # Tom Davis (UBC PHAS GS, Marcel Franz) nu cat<tdavis tdavis:x:457:9000:Tom Davis:/d/vnfe1/home/tdavis:/bin/tcsh END vnNewUsers tdavis telnet vnfe1 telnet vn35 etc sola; vs TODO # UPSNPUK9PQWEo vnDistEtc shadow ssh root@vnfe1 'cd ~tdavis; echo tdavis@physics.ubc.ca > .forward; chown tdavis.other .forward; ls -al; cat .forward' ############################################################ Tue May 20 15:58:00 PDT 2003 ############################################################ vnfe3 had problems, required hard re-boot, and there may be a CPU problem ... see README.CRASH (CRASH_123) From scn@warp.physics.ubc.ca Tue May 20 15:43:12 2003 Hey Matt, We're having some troubles with vnfe3. Kevin first alerted me to the problem this afternoon when he noticed that the tape drive was not responding since there were was a zombie 'tar cvf /dev/tape' that could not be killed. After trying everything I could think of (even tried and failed to kill the 'rmt' processes), I "hard ejected" the tape by holding down on the eject button. This ejected the tape, but when I went to ssh to it I couldn't; nor, could I ping it. I plugged the monitor and keyboard to it and found that it was completely unresponsive to keyboard input and the monitor displayed many "memory addresses" ala (<4324234482> <43284234332> <0053445893> ...) or something like that. Then, when I rebooted it (a hard reboot), I noticed that immediately after the SCSI BIOS initialization and right before the LILO prompt, the following message was displayed for about 0.5 seconds (it's not verbatim, but I believe it's what is displayed): Error: Processor 1 Error: Processor 2 Error: Processor 1 Failed FRB level 3 timer Error: Processor 2 Failed FRB level 3 timer It booted up fine after checking the disk. I noticed many messages like the following in the logs: May 18 04:02:03 vnfe3 kernel: lockd: couldn't bind to server 142.103.237.225 - retrying. May 18 04:02:38 vnfe3 last message repeated 14 times May 18 04:03:43 vnfe3 last message repeated 26 times May 18 04:04:48 vnfe3 last message repeated 26 times May 18 04:05:53 vnfe3 last message repeated 26 times May 18 04:06:43 vnfe3 last message repeated 21 times May 18 04:06:46 vnfe3 kernel: scsi : aborting command due to timeout : pid 142320027, scsi1, channel 0, id 0, lun 0 Read Block Limits 00 00 00 00 00 May 18 04:06:48 vnfe3 kernel: lockd: couldn't bind to server 142.103.237.225 - retrying. Sorry if I unwittingly caused the problem, but I think it may be a CPU problem because of the message at bootup. I can't search the net from vnfe4 for some reason, so I'll try to look up this message when I go back to bh7. Hopefully, it's not a big problem... scott n. ############################################################ Tue May 27 10:00:04 PDT 2003 ############################################################ (1) Toby Johnson requests installation of updated version of GSL (GNU Scientific Library) Downloaded gsl-1.3.tar.gz Compiles OK on vnfe1, and his code builds OK against the new version. Will first install in /usr on one of the nodes # As root@vn53 cdi scp -q root@vnfe1.physics.ubc.ca:/usr/tmp/install/gsl-1.3.tar.gz tar zxf gsl-1.3.tar.gz cd gsl-1.3 configure --prefix=/usr make make install OK cd /tmp cp -r ~johnson/selfing . cd selfing make OK # Hack vnN to exclude vn53 vnallbgCommand 'cdi; scp -q -r root@vn53.physics.ubc.ca:/usr/tmp/install/gsl-1.3 .; cd gsl-1.3; make install' vnallbgCommand 'cd /tmp; cp -r ~johnson/selfing/ .; cd selfing; make; ls' vnallCommand 'cd /tmp/selfing; ls -lt sim' OK ############################################################ Tue Jun 3 11:55:30 PDT 2003 ############################################################ (1) New account for Tudor Costin, UG with Andre Marziali, Physics # From physics tcostin:x:12657:400:Tudor Costin:/home2/tcostin:/bin/tcsh tcostin:Nv3d8whCzlHRA:11575::::::-1 vi README.USERS 12657 tcostin # Tudor Costin (UBC PHAS UG, Marziali) nu cat<tcostin tcostin:x:12657:9000:Tudor Costin:/d/vnfe1/home/tcostin:/bin/tcsh END vnNewUsers tcostin telnet vnfe1 telnet vn35 etc sola; vs TODO # Nv3d8whCzlHRA vnDistEtc shadow ssh root@vnfe1 'cd ~tcostin; echo tcostin@physics.ubc.ca > .forward; chown tcostin.other .forward; ls -al; cat .forward' ############################################################ Tue Jun 3 13:00:00 PDT 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-06-03-1300 # 8068 8068 153292 # laplace:/usr2/people/matt/system/vnArchive # 8068 8068 153292 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:04' | pre CP | post '../Rtop.2003.04' | csh mkdir ../Rtop.2003.05 LS | grep '^2003:05' | pre CP | post '../Rtop.2003.05' | csh mkdir ../Rtop.2003.06 LS | grep '^2003:06' | pre CP | post '../Rtop.2003.06' | csh ############################################################ Fri Jun 6 09:59:43 PDT 2003 ############################################################ (1) Running job statistics for 11/02 - 12/06 prior to solicitation of information for annual report cd /home/matt/system/vnshadow/Rtop.2002.11 vnAccount -p 30 test -d ../vnAccount.02.11 && /bin/rm -rf ../vnAccount.02.11 mkdir ../vnAccount.02.11 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2002.12 vnAccount -p 30 test -d ../vnAccount.02.12 && /bin/rm -rf ../vnAccount.02.12 mkdir ../vnAccount.02.12 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.01 vnAccount -p 30 test -d ../vnAccount.03.01 && /bin/rm -rf ../vnAccount.03.01 mkdir ../vnAccount.03.01 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.02 vnAccount -p 30 test -d ../vnAccount.03.02 && /bin/rm -rf ../vnAccount.03.02 mkdir ../vnAccount.03.02 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.03 vnAccount -p 30 test -d ../vnAccount.03.03 && /bin/rm -rf ../vnAccount.03.03 mkdir ../vnAccount.03.03 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.04 vnAccount -p 30 test -d ../vnAccount.03.04 && /bin/rm -rf ../vnAccount.03.04 mkdir ../vnAccount.03.04 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.05 vnAccount -p 30 test -d ../vnAccount.03.05 && /bin/rm -rf ../vnAccount.03.05 mkdir ../vnAccount.03.05 mv `LS | grep -v '^200'` !$ cd /home/matt/system/vnshadow/Rtop.2003.06 vnAccount -p 30 test -d ../vnAccount.03.06 && /bin/rm -rf ../vnAccount.03.06 mkdir ../vnAccount.03.06 mv `LS | grep -v '^200'` !$ # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN RM -rf vnAccount.02.11 scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.11 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.02.12 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.01 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.02 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.03 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.04 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.05 . scp -r matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vnshadow/vnAccount.03.06 . # Compiling user list for reporting period April 2002 to March 2003 # As matt@laplace cd /usr2/Public/Members/matt/Doc/VN foreach d (\ vnAccount.02.01 \ vnAccount.02.02 \ vnAccount.02.03 \ vnAccount.02.04 \ vnAccount.02.05 \ vnAccount.02.06 \ vnAccount.02.07 \ vnAccount.02.08 \ vnAccount.02.09 \ vnAccount.02.10 \ vnAccount.02.11 \ vnAccount.02.12 \ vnAccount.03.01 \ vnAccount.03.02 \ vnAccount.03.03 \ vnAccount.03.04 \ vnAccount.03.05 \ vnAccount.03.06 \ ) /bin/ls -1 $d/*.html >> /tmp/1 end foreach f (`cat /tmp/1`) basename $f >> /tmp/2 end cd /usr2/people/matt/private/VN/Report03 sort < /tmp/2 | uniq | dext > USERS.RAW ############################################################ Thu Jun 12 13:50:08 PDT 2003 ############################################################ (1) New account for Aaron Dutton, UBC/PHAS GS, w. Courteau # From physics aadutton:x:448:307:Aaron Dutton:/home/aadutton:/bin/tcsh aadutton:8ya8arelU.c22:11647::::::-1 vi README.USERS 448 aadutton # Aaron Dutton (UBC PHAS GS, Courteau) nu cat<aadutton aadutton:x:448:9000:Aaron Dutton:/d/vnfe1/home/aadutton:/bin/tcsh END vnNewUsers aadutton telnet vnfe1 telnet vn35 etc sola; vs TODO # 8ya8arelU.c22 vnDistEtc shadow ssh root@vnfe1 'cd ~aadutton; echo aadutton@astro.ubc.ca > .forward; chown aadutton.other .forward; ls -al; cat .forward' ############################################################ Wed Jun 25 09:11:45 PDT 2003 ############################################################ Nodes vn49-vn64 inclusive have been down for about a day See README.CRASH (CRASH_124) Asbestos workers had flipped breaker. ############################################################ Wed Jun 25 13:12:04 PDT 2003 ############################################################ (1) Moving rousseab, akeshet, lothar home's to /home2 ############################################################ Thu Jun 26 09:46:25 PDT 2003 ############################################################ (1) New account for Mona Berciu, UBC/PHAS Faculty # From physics berciu:x:552:307:Mona Berciu:/home/berciu:/bin/tcsh berciu:0SsYfw9iSCpNA:11870:::::: vi README.USERS 552 berciu # Mona Berciu (UBC PHAS Faculty) nu cat<berciu berciu:x:552:9000:Mona Berciu:/d/vnfe1/home/berciu:/bin/tcsh END vnNewUsers berciu telnet vnfe1 telnet vn35 etc sola; vs TODO # 0SsYfw9iSCpNA vnDistEtc shadow ssh root@vnfe1 'cd ~berciu; echo berciu@physics.ubc.ca > .forward; chown berciu.other .forward; ls -al; cat .forward' ############################################################ Thu Jun 26 09:53:08 PDT 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-06-26-0953 # 3922 3922 74518 # laplace:/usr2/people/matt/system/vnArchive # 3922 3922 74518 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:06' | pre CP | post '../Rtop.2003.06' | csh ############################################################ Mon Jun 30 09:44:26 PDT 2003 ############################################################ vn14 is down vn14 down 2+04:59 see README.CRASH (CRASH_125) ############################################################ Wed Jul 2 09:12:16 PDT 2003 ############################################################ vn25 is down vn25 down 1:03 see README.CRASH (CRASH_126) # TO DO # Secondary installation on vn25 ############################################################ Mon Jul 7 ############################################################ vnfe[13], vn1-vn16 down due to accidental breaker throw. see README.CRASH (CRASH_127) ############################################################ Tue Jul 8 09:29:35 PDT 2003 ############################################################ (1) Complete cluster shutdown prior to physical relocation to small co-lo room foreach u (petryk davis fransp rree suqin cwlai) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end foreach u (lothar murray rucker jfn maggie wkb) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end vnallbgCommand "ps -elf | grep wkb | grep -v grep | nth 4 | pre kill -9 | csh" vnallbgCommand "ps -elf | grep fengxs | grep -v grep | nth 4 | pre kill -9 | csh" ############################################################ Thu Jul 10 17:13:58 PDT 2003 ############################################################ (1) Starting license manager on vnfe3 ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ############################################################ Fri Jul 11 17:35:43 PDT 2003 ############################################################ From scn@warp.physics.ubc.ca Wed Jul 9 13:42:28 2003 vn9 vn16 vn39 down after cluster relocation with bad power supplies? vn9, vn39 and vn16 seem to have bad power supply fans (the fan that is housed in the power supply unit at the rear of the computer). the power supply in vn16 was VERY hot and prevented the node from booting up. ############################################################ Fri Jul 11 17:39:04 PDT 2003 ############################################################ (1) Secondary installation on vn25 # As root@vn25 set path=/root test -f /root/.tcshrc && mv /root/.tcshrc /root/.tcshrc.O test -f /root/.vimrc && mv /root/.vimrc /root/.vimrc.O alias get 'scp -r root@vnfe1:/root/\!* .' cd mv .ssh .ssh.O get .ssh get .cshrc get .aliases get .exrc get .rhosts cd /etc alias get 'scp -r root@vnfe1:/etc/\!* .' get csh.cshrc get hosts.allow get hosts.deny get hosts cp passwd passwd.O cp shadow shadow.O get passwd get shadow get inetd.conf killall -HUP inetd test -f /etc/rc.d/rc.local && cp /etc/rc.d/rc.local /etc/rc.d/rc.local.O scp root@vnfe1:/etc/rc.d/rc.local /etc/rc.d cd /etc scp root@vnfe1:/etc/ntp.conf . cd /etc cp fstab fstab.O vi /etc/fstab # BEGIN add vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 bh2:/home /d/bh2/home nfs rw,bg,hard,intr 0 0 bh7:/home2 /d/bh7/home2 nfs rw,bg,hard,intr 0 0 # END add mkdir -p /d/{vnfe1,vnfe2,vnfe3}/{home,home2} mkdir -p /d/bh7/home2 mkdir -p /d/bh2/home mount -a cd /etc scp root@vn1:/etc/auto.misc . /etc/rc.d/init.d/autofs start # As root@vn1 cd /usr tar cf local.tar local ftp vn25 # Login as matt cd /tmp put local.tar # As root@vn25 cd /usr mv local local.O tar xf /tmp/local.tar vnSetdate ntpd # Daemons linuxconf --text # Duplicated settings on vn1 vnMpptest vn24 vn25 # Seems to be OK ############################################################ Fri Jul 11 18:21:06 PDT 2003 ############################################################ (1) Post-move tests of cluster vnCommand 'ps -elf | grep ntp | grep -v grep; ntptimeset' vnCommand 'mount -a; df; df | wc' # bh7:/home2 not uniformly mounted vnnallbgCommand 'cd /etc; CP fstab fstab.O; CP /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab' vnCommand 'mount -a; df; df | wc' # OK ############################################################ Fri Jul 11 18:35:10 PDT 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-07-11-1835 # 1629 1629 30951 # laplace:/usr2/people/matt/system/vnArchive # 1629 1629 30951 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:06' | pre CP | post '../Rtop.2003.06' | csh mkdir ../Rtop.2003.07 LS | grep '^2003:07' | pre CP | post '../Rtop.2003.07' | csh ############################################################ Mon Jul 14 10:28:14 PDT 2003 ############################################################ (1) UPS powering vn7-vn8, vn18-vn29 went off-line, Dave Jones diagnosed as twist connector not being locked. vn25 didn't come back, apparently same problem as previously. Should reinstall on spare disk. See README.CRASH (CRASH_131) ############################################################ Mon Jul 14 20:11:31 PDT 2003 ############################################################ (1) Secondary installation on vn25 (redux) # As root@vn25 chsh -s /bin/tcsh test -f /root/.tcshrc && mv /root/.tcshrc /root/.tcshrc.O test -f /root/.vimrc && mv /root/.vimrc /root/.vimrc.O alias get 'scp -r root@vnfe1:/root/\!* .' cd mv .ssh .ssh.O get .ssh get .cshrc get .aliases get .exrc get .rhosts cd /etc alias get 'scp -r root@vnfe1:/etc/\!* .' get csh.cshrc get hosts.allow get hosts.deny get hosts cp passwd passwd.O cp shadow shadow.O get passwd get shadow get inetd.conf killall -HUP inetd test -f /etc/rc.d/rc.local && cp /etc/rc.d/rc.local /etc/rc.d/rc.local.O scp root@vnfe1:/etc/rc.d/rc.local /etc/rc.d cd /etc scp root@vnfe1:/etc/ntp.conf . cd /etc cp fstab fstab.O vi /etc/fstab # BEGIN add vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 bh2:/home /d/bh2/home nfs rw,bg,hard,intr 0 0 bh7:/home2 /d/bh7/home2 nfs rw,bg,hard,intr 0 0 # END add mkdir -p /d/{vnfe1,vnfe2,vnfe3}/{home,home2} mkdir -p /d/bh7/home2 mkdir -p /d/bh2/home mount -a cd /etc scp root@vn1:/etc/auto.misc . /etc/rc.d/init.d/autofs start # As root@vn1 cd /usr tar cf local.tar local ftp vn25 # Login as matt cd /tmp put local.tar # As root@vn25 cd /usr mv local local.O tar xf /tmp/local.tar killall sshd # As matt@vn1 telnet vn25 # login as matt su /usr/local/sbin/sshd vnSetdate ntpd # Daemons linuxconf --text # Duplicated settings on vn1 vnMpptest vn24 vn25 # Seems to be OK ############################################################ Thu Jul 17 16:49:15 PDT 2003 ############################################################ (1) New account for Veronica Zimmerman UBC/MECH UG (summer student) with Bushe vi README.USERS nu cat<veronica veronica:x:1619:1600:Veronica Zimmerman:/d/vnfe3/home/veronica:/bin/tcsh END vnNewUsers veronica telnet vnfe1 telnet vn35 etc sola; vs TODO # uu5V7Jn9FoDss vnDistEtc shadow ssh root@vnfe1 'cd ~veronica; echo veronica@mech.ubc.ca > .forward; chown veronica.bushe .forward; ls -al; cat .forward' ############################################################ Thu Jul 24 09:24:27 PDT 2003 ############################################################ (1) New accounts for summer school students # From root@lnx2 grep ss0 /etc/passwd ss0:x:9000:33:Summer school account:/d/lnx2/home/ss0:/bin/tcsh grep ss0 /etc/passwd ss0:183FQC3TPQq1w:12250:0:99999:7::: nu cat>ss0<> fstab' vnallbgCommand 'mkdir -p /d/vnfe4/home; mount vnfe4:/home' ############################################################ Sun Jul 27 18:03:21 PDT 2003 ############################################################ (1) Bill U. commenting on state of /tmp on nodes. I am noticing that there are numerous /tmp directories on the vn nodes whith GB of data which seem to have been there for a year by now. Are the /tmp directories cleaned out periodicaly? (I am at present of course adding to this pollution, but hope to get the stuff out of there quickly.) vnNbgCommand '(cd /tmp; hostname; df .; usage) > /tmp/USAGE &' vnCommand 'cat /tmp/USAGE' > /tmp/USAGE vn1 95% unruh vn2 97% luisl unruh vn3 97% luisl unruh vn4 82% luisl unruh vn5 86% unruh luisl vn17 83% fransp vn63 87% fransp luisl vn64 91% fransp luisl ############################################################ Tue Jul 29 15:45:57 PDT 2003 ############################################################ (1) Making "-O2" the default setting for CFLAGS in /etc/csh.cshrc Appears to cure the problem with extra parens with mod(...) in RNPL. ############################################################ Wed Aug 13 16:38:09 PDT 2003 ############################################################ (1) New account for Igor Tupitsyn, Philip Stamp's collaborator. # From physics tupitsyn:x:550:307:Igor Tupitsyn:/home/tupitsyn:/bin/tcsh tupitsyn:ZK.6/zhMluKos:12192:::::: vi README.USERS 550 tupitsyn # Igor Tupitsyn (UBC PHAS Collaborator [Stamp]) nu cat<tupitsyn tupitsyn:x:550:9000:Igor Tupitsyn:/d/vnfe1/home/tupitsyn:/bin/bash END vnNewUsers tupitsyn telnet vnfe1 telnet vn35 etc sola; vs TODO # ZK.6/zhMluKos vnDistEtc shadow ssh root@vnfe1 'cd ~tupitsyn; echo tupitsyn@physics.ubc.ca > .forward; chown tupitsyn.other .forward; ls -al; cat .forward' ############################################################ Wed Aug 20 09:38:09 PDT 2003 ############################################################ (1) Igor's jobs are killing vnfe1 via NFS tcpdump | grep nfs | grep -v reply | nth 2 | upto \. | tee /tmp/foo sort < /tmp/foo | uniq # Need to add following to /etc/profile TMP=/tmp/${USER} test -d $TMP || mkdir $TMP ############################################################ Sun Aug 24 10:43:48 PDT 2003 ############################################################ (1) vn41 has high load factor (32), Mona has 40+ processes running on it, but apparently only one is using a substantial amount of CPU time ############################################################ Sat Aug 30 17:16:45 PDT 2003 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2003-08-30-1717 # 9218 9218 175142 # laplace:/usr2/people/matt/system/vnArchive # 9218 9218 175142 cd vnArchive RM -r Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2003:07' | pre CP | post '../Rtop.2003.07' | csh mkdir ../Rtop.2003.08 LS | grep '^2003:08' | pre CP | post '../Rtop.2003.08' | csh ############################################################ Tue Sep 2 16:02:33 PDT 2003 ############################################################ (1) New account for Martin Snajdr vi README.USERS 635 bruno # Martin Snajdr nu cat<msnajdr msnajdr:x:635:600:Martin Snajdr:/d/vnfe1/home/msnajdr:/bin/tcsh END vnNewUsers msnajdr telnet vnfe1 telnet vn35 etc sola; vs # VYA2s/C9tt2Ao vnDistEtc shadow ssh root@vnfe1 'cd ~msnajdr; echo msnajdr@physics.ubc.ca > .forward; chown msnajdr.choptuik .forward; ls -al; cat .forward' ############################################################ Mon Sep 8 18:56:35 PDT 2003 ############################################################ (1) Re-installation on vn25 incomplete, need modified kernel, /etc/lilo.conf ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn25 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn25 'vnnewK' ssh root@vn25 'cat /etc/lilo.conf' ssh root@vn25 /sbin/lilo # TO DO reboot Fri Sep 12 07:56:21 PDT 2003 # OK ############################################################ Oct 4 2003 ############################################################ (1) New account for Cortland Griswold vi README.USERS 9074 griswold # Cortland Griswold (UBC ZOOL GS, Whitlock super) nu cat<griswold griswold:x:9074:9000:Cortland Griswold:/d/vnfe1/home/griswold:/bin/bash END vnNewUsers griswold telnet vnfe1 telnet vn35 etc sola; vs # eZh/3yCNu2WdI vnDistEtc shadow ssh root@vnfe1 'cd ~griswold; echo griswold@zoology.ubc.ca > .forward; chown griswold.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sat Oct 11 13:29:41 PDT 2003 ############################################################ (1) New account for Sung Hoon Park vi README.USERS 1620 shpark # Sung Hoon Park nu cat<shpark shpark:x:1620:1600:Sung Hoon Park:/d/vnfe3/home/shpark:/bin/bash END vnNewUsers shpark telnet vnfe1 telnet vn35 TODO etc sola; vs # jirHnjrSmAS/k vnDistEtc shadow ssh root@vnfe1 'cd ~shpark; echo shpark@mech.ubc.ca > .forward; chown shpark.bushe .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Oct 15 08:55:49 PDT 2003 ############################################################ (1) vn25 down again, see README.CRASH (CRASH_135) ############################################################ Mon Oct 20 08:53:05 PDT 2003 ############################################################ (1) and again, see README.CRASH (CRASH_136) ############################################################ Mon Oct 20 08:53:05 PDT 2003 ############################################################ (1) and again, see README.CRASH (CRASH_137) Time to replace disk? ############################################################ Thu Nov 6 13:55:58 PST 2003 ############################################################ (1) vn44 down, accidentally powered down vn45, see CRASH_138, CRASH_139 ############################################################ Fri Nov 7 09:37:38 PST 2003 ############################################################ (1) New account for Roland Stevenson vi README.USERS 642 roland # Roland Stevenson nu cat<roland roland:x:642:600:Roland Stevenson:/d/vnfe1/home/roland:/bin/tcsh END vnNewUsers roland telnet vnfe1 telnet vn35 etc sola; vs # Dd6qU28yOYyuw vnDistEtc shadow ssh root@vnfe1 'cd ~roland; echo roland@physics.ubc.ca > .forward; chown roland.choptuik .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Nov 7 09:37:54 PST 2003 ############################################################ (1) New account for Krzysztof Michalak vi README.USERS 9075 michalak # Krzysztof Michalak (UBC MECH GS, Ollivier-Gooch super) nu cat<michalak michalak:x:9075:9000:Krzysztof Michalak:/d/vnfe3/home/michalak:/bin/bash END vnNewUsers michalak telnet vnfe1 telnet vn35 etc sola; vs # $1$15l94EB9$QbxjniYG3H1osKw.WgvfP. vnDistEtc shadow ssh root@vnfe1 'cd ~michalak; echo michalak@mech.ubc.ca > .forward; chown michalak.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Nov 7 15:10:24 PST 2003 ############################################################ (1) New account for Ilya Elfimov # From physics elfimov:x:554:307:Ilya Elfimov:/home/elfimov:/bin/tcsh elfimov:9JCm.QfdFAJSA:11884:::::: vi README.USERS 554 elfimov # Ilya Elfimov (UBC PHAS PDF, Sawatzky super.) nu cat<elfimov elfimov:x:554:9000:Ilya Elfimov:/d/vnfe1/home/elfimov:/bin/bash END vnNewUsers elfimov telnet vnfe1 telnet vn35 etc sola; vs # 9JCm.QfdFAJSA vnDistEtc shadow ssh root@vnfe1 'cd ~elfimov; echo elfimov@physics.ubc.ca > .forward; chown elfimov.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Mon Nov 17 09:48:43 PST 2003 ############################################################ (1) vn17 down, see README.CRASH (CRASH_140) ############################################################ Sat Nov 29 17:18:53 PST 2003 ############################################################ (1) Secondday installation on vn25 following replacement of hard drive and reinstallation of Mandrake 6.1 # As root@vn25 chsh -s /bin/tcsh test -f /root/.tcshrc && mv /root/.tcshrc /root/.tcshrc.O test -f /root/.vimrc && mv /root/.vimrc /root/.vimrc.O alias get 'scp -r root@vnfe1:/root/\!* .' cd mv .ssh .ssh.O get .ssh get .cshrc get .aliases get .exrc get .rhosts cd /etc alias get 'scp -r root@vnfe1:/etc/\!* .' get csh.cshrc get hosts.allow get hosts.deny get hosts cp passwd passwd.O cp shadow shadow.O get passwd get shadow get inetd.conf killall -HUP inetd test -f /etc/rc.d/rc.local && cp /etc/rc.d/rc.local /etc/rc.d/rc.local.O scp root@vnfe1:/etc/rc.d/rc.local /etc/rc.d cd /etc scp root@vnfe1:/etc/ntp.conf . cd /etc cp fstab fstab.O vi /etc/fstab # BEGIN add vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 bh2:/home /d/bh2/home nfs rw,bg,hard,intr 0 0 bh7:/home2 /d/bh7/home2 nfs rw,bg,hard,intr 0 0 # END add mkdir -p /d/{vnfe1,vnfe2,vnfe3}/{home,home2} mkdir -p /d/bh7/home2 mkdir -p /d/bh2/home mount -a cd /etc scp root@vn1:/etc/auto.misc . /etc/rc.d/init.d/autofs start # As root@vn17 cd /usr tar cf local.tar local ftp vn25 # Login as matt cd /tmp put local.tar # As root@vn25 cd /usr mv local local.O tar xf /tmp/local.tar # AT THIS POINT vn25 APPARENTLY CRASHED killall sshd # As matt@vn17 telnet vn25 # login as matt su /usr/local/sbin/sshd vnSetdate ntpd # Daemons linuxconf --text # Duplicated settings on vn1 vnMpptest vn24 vn25 # Seems to be OK ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn25 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn25 'vnnewK' ssh root@vn25 'cat /etc/lilo.conf' ssh root@vn25 /sbin/lilo # TO DO reboot ###--------------------------------------------------------- ### ssh update ###--------------------------------------------------------- cdi; /d/vnfe1/home/matt/scripts/install-openssh-vn; ############################################################ Fri Dec 12 09:43:18 PST 2003 ############################################################ (1) Secondday installation on vn25 following replacement of memory and reinstall of Mandrake 6.1 # As root@vn25 chsh -s /bin/tcsh test -f /root/.tcshrc && mv /root/.tcshrc /root/.tcshrc.O test -f /root/.vimrc && mv /root/.vimrc /root/.vimrc.O alias get '/usr/local/bin/scp -r root@vnfe1:/root/\!* .' cd mv .ssh .ssh.O get .ssh get .cshrc get .aliases get .exrc get .rhosts cd /etc alias get 'scp -r root@vnfe1:/etc/\!* .' get csh.cshrc get hosts.allow get hosts.deny get hosts cp passwd passwd.O cp shadow shadow.O get passwd get shadow get inetd.conf killall -HUP inetd test -f /etc/rc.d/rc.local && cp /etc/rc.d/rc.local /etc/rc.d/rc.local.O scp root@vnfe1:/etc/rc.d/rc.local /etc/rc.d cd /etc scp root@vnfe1:/etc/ntp.conf . cd /etc cp fstab fstab.O vi /etc/fstab # BEGIN add vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe2:/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe2:/home2 /d/vnfe2/home2 nfs rw,bg,hard,intr 0 0 vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 bh2:/home /d/bh2/home nfs rw,bg,hard,intr 0 0 #bh7:/home2 /d/bh7/home2 nfs rw,bg,hard,intr 0 0 vnfe4:/home /d/vnfe4/home nfs rw,bg,hard,intr 0 0 # END add mkdir -p /d/{vnfe1,vnfe2,vnfe3}/{home,home2} mkdir -p /d/vnfe4/home mkdir -p /d/bh7/home2 mkdir -p /d/bh2/home mount -a cd /etc scp root@vn1:/etc/auto.misc . /etc/rc.d/init.d/autofs start # As root@vn17 cd /usr tar cf local.tar local ftp vn25 # Login as matt cd /tmp put local.tar # As root@vn25 cd /usr mv local local.O tar xf /tmp/local.tar # AT THIS POINT vn25 APPARENTLY CRASHED killall sshd # As matt@vn17 telnet vn25 # login as matt su /usr/local/sbin/sshd vnSetdate ntpd # Daemons linuxconf --text # Duplicated settings on vn1 vnMpptest vn24 vn25 # Seems to be OK ###--------------------------------------------------------- ### Kernel update ###--------------------------------------------------------- ssh root@vn25 'cd /etc; cp lilo.conf lilo.conf.orig; cat lilo.conf.orig' ssh root@vn25 'vnnewK' ssh root@vn25 'cat /etc/lilo.conf' ssh root@vn25 /sbin/lilo reboot # Kernel panic again, Kevin has probably reinstalled with different # partitioning scheme, copying lilo.conf makes system unbootable boot: linux-new single boot=/dev/hda2 vi /etc/lilo.conf hda1 -> hda2 /sbin/lilo ############################################################ Fri Dec 12 11:30:32 PST 2003 ############################################################ (1) PG compilers need restart vnfeCommand 'killall -9 lmgrd; killall -9 pgroupd' vnfeCommand 'killall -9 lmgrd; killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit # Had to manually remove lock file, /usr/tmp/lockpgroupd, on vnfe1 ############################################################ Wed Jan 28 18:03:53 PST 2004 ############################################################ (1) New account for Bei Jing # From physics vi README.USERS 554 elfimov # Ilya Elfimov (UBC PHAS PDF, Sawatzky super.) nu cat<jinbei jinbei:x:1621:1600:Bei Jin:/d/vnfe3/home/jinbei:/bin/bash END vnNewUsers jinbei telnet vnfe1 telnet vn35 TODO etc sola; vs # $1$h3T.axgj$VCcAHs83fkKZLQDCBum/Q0 vnDistEtc shadow ssh root@vnfe1 'cd ~jinbei; echo jinbei@mech.ubc.ca > .forward; chown jinbei.bushe .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Feb 25 14:47:28 PST 2004 ############################################################ (1) New account for Chris Hemming # From physics vi README.USERS 1255 chemming # Chris Hemming nu cat<chemming chemming:x:1255:1200:Chris Hemming:/d/vnfe2/home/chemming:/bin/bash END vnNewUsers chemming telnet vnfe1 telnet vn35 TODO etc sola; vs # 187XjpsqS54K6 vnDistEtc shadow ssh root@vnfe1 'cd ~chemming; echo chemming@chem.ubc.ca > .forward; chown chemming.patey .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Feb 25 15:35:50 PST 2004 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ # As matt@vnfe4 cd vnArchive setenv TS 2004-02-25-1536 tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2004-02-25-1536 # 26208 26208 497952 # Untar on bh0 # laplace:/usr2/people/matt/system/vnArchive # 26208 26208 497952 cd vnArchive RM -r Rtop.${TS} mkdir -p /home/matt/system/vnshadow/Rtop.2004 mkdir -p /home/matt/system/vnshadow/Rtop.2003.11 mkdir -p /home/matt/system/vnshadow/Rtop.2003.12 mkdir -p /home/matt/system/vnshadow/Rtop.2004.01 mkdir -p /home/matt/system/vnshadow/Rtop.2004.02 mkdir -p /home/matt/system/vnshadow/Rtop.2004.03 mkdir -p /home/matt/system/vnshadow/Rtop.2004.04 # As matt@vnfe4 setenv TS 2004-02-25-1536 cd /home/matt/system/vnshadow/Rtop.2003 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} LS | grep '^2004' | pre CP | post /home/matt/system/vnshadow/Rtop.2004 | csh LS | grep '^2003:10' | pre CP | post '../Rtop.2003.10' | csh LS | grep '^2003:11' | pre CP | post '../Rtop.2003.11' | csh LS | grep '^2003:12' | pre CP | post '../Rtop.2003.12' | csh cd /home/matt/system/vnshadow/Rtop.2004 LS | grep '^2004:01' | pre CP | post '../Rtop.2004.01' | csh LS | grep '^2004:02' | pre CP | post '../Rtop.2004.02' | csh ############################################################ Tue Mar 2 13:54:27 PST 2004 ############################################################ (1) Adding /usr/local/share/texmf tree to .tex configuration # As root@vnfe1 vi /usr/share/texmf/web2c/texmf.cnf # Added/modified % Local tree TEXMFLOCAL = /usr/local/share/texmf TEXMF = {!!,$TEXMFLOCAL,!!$TEXMFMAIN} mkdir -p /usr/local/share/texmf/tex/latex cd !$ scp -r matt@bh0.physics.ubc.ca:/usr/local/share/texmf/tex/latex/misc . texhash # As root@vnfe{23} scp root@vnfe1:/usr/share/texmf/web2c/texmf.cnf /usr/share/texmf/web2c/texmf.cnf mkdir -p /usr/local/share/texmf/tex/latex scp -r matt@bh0.physics.ubc.ca:/usr/local/share/texmf/tex/latex/misc /usr/local/share/texmf/tex/latex texhash # As matt@vnfe1 etc cp /usr/share/texmf/web2c/texmf.cnf . ############################################################ Wed Mar 3 22:31:19 PST 2004 ############################################################ (1) New account for Justin Hsu, UG with Plotlin # From physics jwhsu:x:12992:400:Justin Wei-Chen Hsu:/home2/jwhsu:/bin/tcsh jwhsu:Tl9WYXuS7e212:11939:::::: vi README.USERS 12992 jwhsu # Justin Wei-Chen Hsu nu cat<jwhsu jwhsu:x:12992:9000:Justin Hsu:/d/vnfe1/home/jwhsu:/bin/bash END vnNewUsers jwhsu telnet vnfe1 telnet vn35 TODO etc sola; vs # Tl9WYXuS7e212 vnDistEtc shadow ssh root@vnfe1 'cd ~jwhsu; echo jwhsu@interchnage.ubc.ca > .forward; chown jwhsu.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Mar 10 18:13:27 PST 2004 ############################################################ (1) Moving Justin Hsu to /d/vnfe1/home2 # As root@vnfe1 mv /d/vnfe1/home/jwhsu /d/vnfe1/home2 # As matt@vnfe1 etc scp root@vnfe1:/etc/passwd passwd.vnfe1 diff !$ passwd ############################################################ Mon Mar 29 10:56:34 PST 2004 ############################################################ (1) New account for Willen Atsma, Mech Eng GS with Hodgson # From physics vi README.USERS 9076 watsma # Willem Atsma (UBC MECH GS, Hodgson) nu cat<watsma watsma:x:9076:9000:Willem Atsma:/d/vnfe3/home2/watsma:/bin/bash END vnNewUsers watsma telnet vnfe1 telnet vn35 TODO etc sola; vs # $1$4VvNAUtx$/YN2XF63aHcDLdvYDTeDo. vnDistEtc shadow ssh root@vnfe3 'cd ~watsma; echo watsma@mech.ubc.ca > .forward; chown watsma.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Tue Apr 13 19:12:01 PDT 2004 ############################################################ (1) New account for John Homenuke vi README.USERS 13224 jhomenuk # John Homenuke (UBC PHAS UG) nu cat<jhomenuk jhomenuk:x:13224:600:John Nicholas Homenuke:/d/vnfe1/home2/jhomenuk:/bin/tcsh END vnNewUsers jhomenuk telnet vnfe1 telnet vn35 etc sola; vs # B6iicC9kTBsfY vnDistEtc shadow ssh root@vnfe1 'cd ~jhomenuk; echo jhomenuk@physics.ubc.ca > .forward; chown jhomenuk.choptuik .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sun Apr 18 17:27:27 PDT 2004 ############################################################ (1) vn20 has been down for about a day, see README.CRASH (CRASH_144) ############################################################ Mon May 24 06:54:18 PDT 2004 ############################################################ (1) vn22 has been down for some time (I'm currently in Golm) vn.physics.ubc.ca Compute Node Status: Mon May 24 06:45:00 PDT 2004 The following nodes are down: 1: vn22 down 2+14:35 see README.CRASH (CRASH_145) ############################################################ Fri May 28 02:02:34 PDT 2004 ############################################################ (1) Disabled setenv PATH "${kdepath}:${PATH}" in /etc/profile.d/kde.csh since it was prepending /usr/bin to $PATH, which led to /usr/bin/pgcc being found by default rather than $PGI/../pgcc ############################################################ Wed Jun 2 11:09:35 PDT 2004 ############################################################ (1) Moving some users from vnfe1:/home to vnfe1:/home2 # As root@vnfe1 setenv USERS "\ berciu \ roland \ zming \ plotkin \ istairs \ hiranya \ promislow \ sdewekker \ rcoope \ " foreach u (`echo $USERS`) echo "u=$u" test -d /home/$u && echo "/home/$u exists" test -d /home2/$u && echo "/home2/$u exists" test -d /home2/$u || echo "/home2/$u does not exist" test -d /home2/$u || mv /home/$u /home2 end foreach u (`echo $USERS`) echo "u=$u" test -d /home2/$u && echo "/home2/$u exists" test -d /home/$u || echo "/home/$u does not exist" test -d /home2/$u && ln -s /home2/$u /home end ############################################################ Wed Jun 2 16:57:11 PDT 2004 ############################################################ (1) Archiving Rtop files ... # Should implement 'ts' script facility that sets returns time # stamps across machines cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ # As matt@vnfe4 cd vnArchive setenv TS 2004-06-02-1657 tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2004-06-02-1657 # 10957 10957 208183 # As matt@bh0 setenv TS 2004-06-02-1657 cd system/vnArchive cp /d/vnfe4/home/matt/system/vnArchive/Rtop.${TS}.tar.gz . tar zxf Rtop.${TS}.tar.gz cd Rtop.${TS} LS | wc # 10957 10957 208183 RM -rf Rtop.${TS} # As matt@vnfe4 cd vnArchive RM -r Rtop.${TS} mkdir -p /home/matt/system/vnshadow/Rtop.2004.03 mkdir -p /home/matt/system/vnshadow/Rtop.2004.04 mkdir -p /home/matt/system/vnshadow/Rtop.2004.05 mkdir -p /home/matt/system/vnshadow/Rtop.2004.06 # As matt@vnfe4 setenv TS 2004-06-02-1657 cd /home/matt/system/vnshadow/Rtop.2004 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2004 LS | grep '^2004:02' | pre CP | post '../Rtop.2004.02' | csh LS | grep '^2004:03' | pre CP | post '../Rtop.2004.03' | csh LS | grep '^2004:04' | pre CP | post '../Rtop.2004.04' | csh LS | grep '^2004:05' | pre CP | post '../Rtop.2004.05' | csh LS | grep '^2004:06' | pre CP | post '../Rtop.2004.06' | csh ############################################################ Thu Jun 10 05:26:55 PDT 2004 ############################################################ (1) vn11 needs reboot, see README_CRASH (CRASH_146) ############################################################ Wed Jun 16 19:50:46 PDT 2004 ############################################################ (1) vn52 needs reboot, see README_CRASH (CRASH_147) ############################################################ Wed Jun 23 10:07:19 PDT 2004 ############################################################ (1) vn34 needs reboot, see README_CRASH (CRASH_148) The following nodes are down: 1: vn34 down 22:31 and perhaps do a bit of sleuthing re cause of crashes (common user) ############################################################ Sat Jun 26 10:06:19 PDT 2004 ############################################################ (1) Moving jasonz from vnfe3:/home to vnfe4:/home2 # As root@vnfe3 cd /home mv jasonz /home2 ln -s /home2/jasonz /home/jasonz ############################################################ Wed Jul 7 07:40:24 PDT 2004 ############################################################ (1) New account for Ryan Shannon (UBC PHYS UG, Heyl) # As choptuik@physics.ubc.ca sudo pwentry rmshanno:x:12314:400:Ryan Marshall Shannon:/home2/rmshanno:/bin/tcsh rmshanno:6d5cGKzZlStQU:11207::::::-1 vi README.USERS 12314 rmshanno # Ryan Shannon (UBC PHYS UG, Heyl) nu cat<rmshanno rmshanno:x:12314:9000:Ryan Marshall Shannon:/d/vnfe1/home2/rmshanno:/bin/tcsh END vnNewUsers rmshanno telnet vnfe1 telnet vn35 etc sola; vs # 6d5cGKzZlStQU vnDistEtc shadow ssh root@vnfe1 'cd ~rmshanno; echo rmshanno@physics.ubc.ca > .forward; chown rmshanno.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Jul 9 15:32:49 PDT 2004 ############################################################ (1) vn11 down, not ping-able vn11 down 8:00 see README.CRASH (CRASH_149) Rebooted, crashed again -> shop ############################################################ Fri Jul 9 15:32:49 PDT 2004 ############################################################ (1) vn22 down, pinable, reboot see README.CRASH (CRASH_150) ############################################################ Fri Aug 6 10:04:04 PDT 2004 ############################################################ (1) New account for Ngo Van Thanh (ITP Hanoi, PDF) vi README.USERS 9077 nvthanh # Ngo Van Thanh (ITP Hanoi, PDF) nu cat<nvthanh nvthanh:x:9077:9000:Ngo Van Thanh:/d/vnfe1/home2/nvthanh:/bin/bash END vnNewUsers nvthanh telnet vnfe1 telnet vn35 etc sola; vs # $1$nfqWj0Zy$i7y5P4Jm39toH7uNpsqid. vnDistEtc shadow ssh root@vnfe1 'cd ~nvthanh; echo nvthanh@iop.vast.ac.vn > .forward; chown nvthanh.other .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sat Aug 7 09:32:49 PDT 2004 ############################################################ (1) New account for Andrew Jason Penner vi README.USERS 689 ajpenner # Andrew Jason Penner nu cat<ajpenner ajpenner:x:689:600:Andrew Jason Penner:/d/vnfe1/home2/ajpenner:/bin/tcsh END vnNewUsers ajpenner telnet vnfe1 telnet vn35 etc sola; vs # vTRev1W0igUD2 vnDistEtc shadow ssh root@vnfe1 'cd ~ajpenner; echo ajpenner@physics.ubc.ca > .forward; chown ajpenner.choptuik .forward; ls -al; cat .forward' DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Aug 11 11:14:40 PDT 2004 ############################################################ (1) New account for Tian Si Wang (UBC PHAS GS, Young) # From physics.ubc.ca sudo pwentry tswang tswang:x:490:307:Tian Si Wang:/home2/tswang:/bin/tcsh tswang:XjFHfhOxpvxNM:12297:::::: vi README.USERS 490 tswang # Tian Si Wang nu cat<tswang tswang:x:490:9000:Tian Si Wang:/d/vnfe1/home2/tswang:/bin/tcsh END vnNewUsers tswang telnet vnfe1 telnet vn35 etc sola; vs # XjFHfhOxpvxNM vnDistEtc shadow ssh root@vnfe1 'cd ~tswang; echo tswang@physics.ubc.ca > .forward; chown tswang.other .forward; ls -al; cat .forward' TODO DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Aug 13 13:42:53 PDT 2004 ############################################################ (1) vnfe1:/home is 100% full. Apparently davis is culprit, usage has grown to about 2GB from very little this AM # Relocate peters cd /home mv peters /home2 ln -s /home2/peters /home # Kill all davis jobs vnallbgCommand 'killall base' vnallbgCommand 'killall base_2' vnallbgCommand 'killall base_fixedh' # Update /etc/motd, web page and send mail to davis davis@zoology.ubc.ca ############################################################ Mon Aug 16 10:46:41 PDT 2004 ############################################################ # (1) In course of getting RNPL trivial parallel job going on # vnfe1, remember that now advocate *NO* -Msecond_underscore # with pgf77 etc. and -fno_second_underscore with gnu. # In an attempt to break as few things as possible, will add # new scripts /usr/local/PGI/bin/mpif77_ /usr/local/PGI/bin/mpif90_ # which do *not* use -Msecond_underscore # As root@vnfe1 cd /usr/local/PGI/bin cp mpif77 mpif77_ cp mpif90 mpif90_ vi mpif*_ # Deleted all occurences of -Msecond_underscore grep second_un mpif*_ # ... linking seems to be picking up /usr/local/lib/librnpl.a instead of # /usr/local/PGI/lib/librnpl.a --- fixed by adding -L/usr/local/PGI/lib # Should re-install all of PGI stuff without -Msecond_underscore, for time # being, /usr/local/PGI/lib/libpmpich_.a /usr/local/PGI/lib/libmpich_.a ssh root@vnfe1 vnMPImakePG_ ssh root@vnfe2 vnMPImakePG_ ssh root@vnfe3 vnMPImakePG_ # As root@vnfe[123] cd /var/tmp/install/PG_/mpich/build/LINUX/ch_p4/lib foreach l (lib*.a) set stem=`echo $l | dext` echo $stem cp $stem.a /usr/local/PGI/lib/${stem}_.a end TODO # However, seems to be some problem with propagation of arguments in # Fortran programs ############################################################ Thu Aug 19 18:45:17 PDT 2004 ############################################################ (1) New account for Ben(jamin) Gutierrez # From physics.ubc.ca sudo pwentry benjamin benjamin:x:694:307:Benjamin Gutierrez:/home2/benjamin:/bin/tcsh benjamin:p/9yuV5eZh6N.:12648:::::: vi README.USERS 694 benjamin # Benjamin Gutierrez nu cat<benjamin benjamin:x:694:600:Benjamin Gutierrez:/d/vnfe1/home2/benjamin:/bin/tcsh END vnNewUsers benja telnet vnfe1 telnet vn35 etc sola; vs # p/9yuV5eZh6N. vnDistEtc shadow ssh root@vnfe1 'cd ~benjamin; echo benjamin@physics.ubc.ca > .forward; chown benjamin.choptuik .forward; ls -al; cat .forward' TODO DUPLICATED ACCOUNT ON vnfe4 ############################################################ Mon Aug 23 11:51:01 PDT 2004 ############################################################ (1) vn1 down vn1 down 1+13:50 # Pal is going over to the cluster # see README.CRASH (CRASH_151) ############################################################ Mon Aug 23 15:15:45 PDT 2004 ############################################################ vnallCommand grep Failed /var/log/messages > /tmp/mess ############################################################ Wed Aug 25 14:34:23 PDT 2004 ############################################################ (1) vn25 down # Pal caught this one and will reboot. # see README.CRASH (CRASH_152) # POWER SUPPLY REPLACED ############################################################ Wed Sep 1 17:44:47 PDT 2004 ############################################################ (1) New account for Paul Koerber, (UBC PHAS PDF, Van Raamsdonk) # From physics.ubc.ca sudo pwentry koerber koerber:x:698:307:Paul Koerber:/home/koerber:/bin/tcsh koerber:wEh.v.IcDVr/k:12654:::::: vi README.USERS 698 koerber # Paul Koerber nu cat<koerber koerber:x:698:9000:Paul Koerber:/d/vnfe1/home2/koerber:/bin/tcsh END vnNewUsers koerber telnet vnfe1 telnet vn35 etc sola; vs # wEh.v.IcDVr/k vnDistEtc shadow ssh root@vnfe1 'cd ~koerber; echo koerber@physics.ubc.ca > .forward; chown koerber.other .forward; ls -al; cat .forward' TODO DUPLICATED ACCOUNT ON vnfe4 ############################################################ Mon Sep 6 14:20:49 PDT 2004 ############################################################ 1) vnfe2:/home 100% full, chemming likely culprit. chemming daub overduin # major users # Move liam monojoy to /d/vnfe2/home2 # As root@vnfe2 set U=liam mv /home/$U /home2 ln -s /home2/$U /home set U=monojoy mv /home/$U /home2 ln -s /home2/$U /home ############################################################ Mon Sep 13 12:25:14 PDT 2004 ############################################################ (1) New account for Alistair Blachford (UBC ZOOL/MATH GS, Doebeli) vi README.USERS 9078 alistair # Alistair Blachford (UBC ZOOL/MATH GS, Doebeli) nu cat<alistair alistair:x:9078:9000:alistair:/d/vnfe1/home2/alistair:/bin/bash END vnNewUsers alistair telnet vnfe1 telnet vn35 etc sola; vs # PCfnlgFp1RK8Q vnDistEtc shadow ssh root@vnfe1 'cd ~alistair; echo alistair@zoology.ubc.ca > .forward; chown alistair.other .forward; ls -al; cat .forward' TODO DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Sep 15 09:56:37 PDT 2004 ############################################################ 1) vnfe1:/home was entirely filled beginning yesterday noon Moved /d/vnfe1/home/shawley -> /d/vnfe2/home/shawley and left symbolic link. # As root@vnfe1 mv /d/vnfe1/home/shawley /d/vnfe2/home/shawley ln -s !$ . mv /d/vnfe1/home/inaki /d/vnfe4/home/inaki/inaki-vnfe1 ln -s !$ inaki 2) Want to drain cluster and then reboot, put up motd.2004.09.15 ################################################################################ SEPTEMBER 15: 10:00 AM: PLEASE LET THIS CLUSTER DRAIN. DO NOT START ANY FURTHER PROCESSES UNTIL FURTHER NOTICE. Once the cluster is idle, it will be rebooted. Most of the machines have been up for a human gestation period, which is too long for this brand of Unix, so it's time to simulate a power outage! YOUR COOPERATION IN STOPPING PROCESSES ASAP IF POSSIBLE IS APPRECIATED. MANAGEMENT APOLOGIZES FOR THE INCONVENIENCE THIS WILL CAUSE. ALSO, vnfe1:/home WAS COMPLETELY FILLED STARTING YESTERDAY AFTERNOON. SEE THE CLUSTER WEB PAGE FOR MORE INFORMATION. ################################################################################ ############################################################ Wed Sep 15 09:56:37 PDT 2004 ############################################################ # 1) Killing Ngo ...'s job and locking his account foreach u (nvthanh) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end foreach u (nvthanh) vnCommand 'ps -elf | grep $u | grep -v grep' end # As root@vnfe1 usermod -L nvthanh # As matt@vnfe1 etc sola vs ############################################################ Sun Sep 19 11:46:41 PDT 2004 ############################################################ # 1) Time for reboot, after telling me "a couple of days on # the phone" then going incommunicado for ? days +, alistair # is still running, no? foreach u (alistair tupitsyn) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end foreach u (alistair tupitsyn) vnCommand "ps -elf | grep $u | grep -v grep" end # Nfs-mounts # As root@vnfe4 'df | grep 'vnfe[123]' | nth last | pre umount | tcsh; df' # As matt@bh0 bhCommand "df | grep 'vnfe[123]' | nth last | pre umount | tcsh" # As matt@vnfe1 vnCommand "df | grep 'vnfe[123]' | nth last | pre umount | tcsh" vnCommand 'ps -elf | cut -d" " -f 3' | sort | uniq > /tmp/alluids foreach u (michalak petryk roland) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end foreach u (michalak petryk roland) vnCommand "ps -elf | grep $u | grep -v grep" end # Take matt out of /tmp/alluids vnCommand 'ps -elf | cut -d" " -f 3' | sort | uniq > /tmp/alluids1 vnCommand 'df | grep "vnfe[123]" | nth last | pre umount | tcsh; df' > /tmp/mount vnCommand 'umount -a -t nfs; df' | tee /tmp/mount1 vnCommand 'killall -9 wave; ps -elf | grep wave | grep -v grep' vnCommand 'umount -a -t nfs; df' | tee /tmp/mount2 /bin/rm /tmp/mount* vnfeCommand 'umount -a -t nfs; df' | tee /tmp/mount3 ssh root@vnfe2 reboot exit ssh root@vnfe3 reboot exit ssh root@vnfe1 reboot exit # Back after all of the disk checks forced because they had exceeded the # O/S limit # As matt@vnfe1 vnfeCommand 'df' vnfeCommand 'df|grep vnfe|wc|nth 1' # 3 x 5 OK vnCommand "df | grep -v '^/dev/hd' | grep -v '^File'" | tee /tmp/mount vnCommand '(sleep 2; reboot); sleep 1' # Sun Sep 19 11:55:35 PDT 2004 ping vn64 # As root@vnfe1 vnfeCommand '/bin/rm /var/spool/rwho/*' vnCommand 'mount -a; df; date' # Chage vnSetdate to get time from bh0 vnCommand 'vnSetdate' # ssh not running on vn60 vnCommand 'killall ntpd; ntpd; vnSetdate' vnCommand 'ntptimeset' vnfeCommand 'cd /root/TestPGI; make clean; make; ls' # License manager not running vnfeCommand 'killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit vnfeCommand 'cd /root/TestPGI; make clean; make; ls' # OK # As root@vnfe2 killall xntpd ntpd # As matt@vnfe1 # Updated motd etc cp motd.2004.09.15 motd.2004.09.19 vi motd.2004.09.19 cp motd.2004.09.19 motd vnDistEtc motd # As matt@bh0 # Updated Web page vnh allCommand 'mount -a; df | grep vnfe' Sun Sep 19 13:29:56 PDT 2004 # Out of machine room (into the sunshine)! Sun Sep 19 14:01:08 PDT 2004 # Had to hack Rtop, had only been displaying top 127 proceses 131 -> 132 ############################################################ Mon Sep 20 11:47:37 PDT 2004 ############################################################ # 1) Need to rebuild PGI libraries *without* -Msecond_underscore, # then change documentation, /etc/csh.cshrc cd /usr/local/PGI ls MPI-2-C++/ build/ examples/ info/ man/ mpiuninstall* bin/ doc/ include/ lib/ mpe_examples/ util/ # First install rnpletal, then mpich # As matt@vnfe1 cd /home/matt/system/vn/image/master/install Arc *PG* vi *PG* # As matt@bh0 # Hacked on /d/bh0/home/matt/autoconf/rnpletal/Install.pgi.PIII cd /etc find . -type f -exec grep -l second_ {} \; # As root@vnfe[123] vnMPImakePG_ # OK # Now addressing Scott's problem with 'texmex' # As matt@bh0 cd debug/shawley cp -r /home/matt/debug/shawley/texmex . cd texmex poptp source /d/vnfe1/home/matt/scripts/soPG-parallel make # OK ############################################################ Thu Oct 7 09:12:46 PDT 2004 ############################################################ # New account for Matthias Huber (Visiting GS, UBC PHAS, Plotkin) # From physics matthias:x:466:307:Matthias Huber:/home/matthias:/bin/tcsh matthias:Ei9RTd2EhpAkU:12564:::::: vi README.USERS 466 matthias # Matthias Huber nu cat<matthias matthias:x:466:9000:matthias:/d/vnfe1/home2/matthias:/bin/bash END vnNewUsers matthias telnet vnfe1 telnet vn35 etc sola; vs # Ei9RTd2EhpAkU vnDistEtc shadow ssh root@vnfe1 'cd ~matthias; echo matthias@physics.ubc.ca > .forward; chown matthias.other .forward; ls -al; cat .forward' TODO DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Oct 8 17:19:05 PDT 2004 ############################################################ (1) Adding Ingrid's disks to automount configuration etc vi auto.misc #coop -rw,hard,intr cooperon.physics.ubc.ca:/export/work #psr1 -rw,hard,intr ariel.astro.ubc.ca:/export/psr1 ASTRO -ro,hard,intr ariel.astro.ubc.ca:/export/astro_lnx vnDistEtc auto.misc vnallbgCommand '/etc/rc.d/init.d/autofs start' ############################################################ Sat Oct 9 20:30:30 PDT 2004 ############################################################ # New account for Mike Vitalo (UT Austin, PHYS UG (Hawley)) vi README.USERS 9079 mvitalo # Michael Joseph Vitalo (UT Austin/PHYS UG, Matzner/Hawley) nu setenv NU mvitalo cat<${NU} ${NU}:x:9079:9000:Michael Joseph Vitalo:/d/vnfe1/home2/${NU}:/bin/bash END vnNewUsers ${NU} telnet vnfe1 telnet vn35 etc sola; vs # o570lfltkPRRs vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@mail.utexas.edu > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sun Oct 17 17:59:45 PDT 2004 ############################################################ # 1) matthias has filled up /d/vnfe1/home2 # mv akeshet, berciu # As root@vnfe1 RM /home/akeshet mv /home2/akeshet /home RM /home/berciu mv /home2/berciu /home foreach u (matthias) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end ############################################################ Wed Oct 20 17:04:00 PDT 2004 ############################################################ # Ingrid having problems apparently related to move of # home directory, symlink probably doesn't include # /d/vnfe1 ... yup # As root@vnfe1 cd /home RM istairs ln -s /d/vnfe1/home2/istairs . # Revisit attempts to mount scarlatti disks etc view auto.misc pulsar1 -ro,hard,intr scarlatti.astro.ubc.ca:/data1/pulsar pulsar2 -ro,hard,intr scarlatti.astro.ubc.ca:/data2/pulsar vnallbgCommand '/etc/rc.d/init.d/autofs start' ping scarlatti.astro.ubc.ca PING scarlatti.astro.ubc.ca (142.103.236.140): 56 data bytes 64 bytes from 142.103.236.140: icmp_seq=0 ttl=61 time=1.2 ms # OK ############################################################ Wed Oct 20 21:37:16 PDT 2004 ############################################################ # New account for Laura Kasian (UBC PHAS Grad (Stairs)) # From physics kasian:x:704:307:Laura Kasian:/home/kasian:/bin/tcsh kasian:Q1jcNcpa7eMG2:12660:::::: vi README.USERS 704 kasian # Laura Kasian (UBC PHAS Grad (Stairs)) nu setenv NU kasian cat<${NU} ${NU}:x:704:9000:Laura Kasian:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} telnet vnfe1 telnet vn35 etc sola; vs # Q1jcNcpa7eMG2 vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Oct 20 21:39:46 PDT 2004 ############################################################ # New account for Steve Begin (UBC PHAS Grad (Stairs)) # From physics sbegin:x:530:307:Steve Begin:/home/sbegin:/bin/tcsh sbegin:VUHWdE46LLsUE:12292:::::: vi README.USERS 530 sbegin # Steve Begin (UBC PHAS GS (Stairs)) nu setenv NU sbegin cat<${NU} ${NU}:x:530:9000:Steve Begin:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} telnet vnfe1 telnet vn35 etc sola; vs # VUHWdE46LLsUE vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Oct 20 21:41:36 PDT 2004 ############################################################ # New account for Brian Martin (UBC PHAS UG (Choptuik)) # From physics brianmar:x:12476:400:Brian Martin:/home/brianmar:/bin/tcsh brianmar:O9a37VNe5l8QU:11220::::::-1 vi README.USERS 12476 brianmar # Brian Martin (UBC PHAS UB (Choptuik)) nu setenv NU brianmar cat<${NU} ${NU}:x:12476:600:Brian Martin:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} telnet vnfe1 telnet vn35 etc sola; vs # O9a37VNe5l8QU vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sat Oct 23 08:11:06 PDT 2004 ############################################################ # Sat Oct 23 08:10:33 PDT 2004 # vn16 down again, needs to go into the shop # See README.CRASH (CRASH_157) # Varsity can find nothing wrong cat /etc/motd ###################################################################### ###################################################################### ### ### ### This node has been somewhat unstable recently. Users are ### ### advised to avoid its use if possible. ### ### ### ###################################################################### ###################################################################### ############################################################ Sat Oct 30 17:29:16 PDT 2004 ############################################################ # Pal reports problems with NFS on vnfe2 (actually reports # problems mounting). # # Real mess. Repeated 'killall mount''s on the bh machines still # leaves mount request at vnfe2, per /var/log/messages, every few # seconds or so. Shutdown all NFS on bh machines (except bh0), then # restart NFS on vnfe2 # During this process becomes apparent that bh9 needs a reboot. bhCommand 'mount -av' # See matt@bh0:~/system/bh/README for full details, required # reboots of bh[0-9] plus metered (approx one per minute max) # mounts of vnfe2:{/home,/home2} to get vnfe2's disks mounted # on all bh machines ############################################################ Sat Oct 30 17:29:16 PDT 2004 ############################################################ # Fixing up dangling symbiolic links cd /d/vnfe1/home foreach u ( ghlim hiranya lothar peters plotkin promislow rcoope savall sdewekker stocki zming) RM $u ln -s /d/vnfe1/home2/$u end cd /d/vnfe3/home foreach u ( jasonz ) RM $u ln -s /d/vnfe3/home2/$u end ############################################################ Tue Nov 9 16:16:03 PST 2004 ############################################################ # vnfe2 gone into shop, Pal copied vnfe2{/home,/home2} to # /d/vnfe4/home/vnfe2-tmp vnnallbgCommand 'mv /d/vnfe2 /d/vnfe2.O; ln -s /d/vnfe4/home/vnfe2-tmp /d/vnfe2' vnfeCommand 'mv /d/vnfe2 /d/vnfe2.O; ln -s /d/vnfe4/home/vnfe2-tmp /d/vnfe2' ############################################################ Wed Nov 10 13:48:26 PST 2004 ############################################################ # 3 node crashes in past 24 hours vn34 vn44 vn47 From psandhu@physics.ubc.ca Wed Nov 10 13:49:10 2004 Date: Wed, 10 Nov 2004 13:15:22 -0800 (PST) From: Pal Sandhu To: Matthew W. Choptuik Subject: vn44 Hi Matt, this is Pal. I rebooted vn44. It looks like it went down last night at around 10pm. Also vn34 and vn47 went down last night at about 7pm but I had roland reboot those last night. # Needs to be investigated ############################################################ Wed Nov 10 15:25:57 PST 2004 ############################################################ # And vn62 # Roland? Whomever, cease and desist. # Looks like could be Roland # Sent message # As idle@vnfe1 RTOP # Looks like vn62 is down again ############################################################ Mon Nov 15 14:10:58 PST 2004 ############################################################ # vnfe2 is back up. Remove current links to /d/vnfe4, and # restore vnfe2 mounts viw vnfeN # Disabled vnfe2 allCommand 'rmdir /d/vnfe2/home.O; rmdir /d/vnfe2/home2.O' allCommand 'umount -l /d/vnfe2/home; umount -l /d/vnfe2/home2' allCommand 'mv /d/vnfe2/home /d/vnfe2/home.O; mv /d/vnfe2/home2 /d/vnfe2/home2.O; mkdir /d/vnfe2/home; mkdir /d/vnfe2/home2' rrvi /etc/fstab allCommand 'umount vnfe2:/home; umount vnfe2:/home2' allCommand 'mount -a; df | grep vnfe2' # OK # Update /etc/motd ############################################################ Tue Nov 16 22:13:40 PST 2004 ############################################################ # vnfe2 down, back up with monitor on to catch possible # console messages (kernel panic) vnnallbgCommand 'umount vnfe2:/home; umount vnfe2:/home2' vnCommand 'df | grep vnfe2' >>> Executing as matt@142.103.237.20 Disconnecting: Corrupted MAC on input. # Restarted sshd vnnallbgCommand 'cd /etc; Arc fstab; CP /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node.real fstab; mount -a; df | grep vnfe2' ~matt/scripts/renew-vnfe2-mounts #!/bin/sh -x P=`basename $0` die(){ echo "$P: $1" exit 1 } warn(){ echo "$P: $1" } test -d /d/vnfe2 && mv /d/vnfe2 /d/vnfe2.O mkdir -p /d/vnfe2/home /d/vnfe2/home2 || die "Could not make /d/vnfe2/{home,home2}" mount -a df | grep vnfe2 END vnallbgCommand renew-vnfe2 # Problems with vn1 ... ############################################################ Wed Nov 24 07:44:53 PST 2004 ############################################################ # vnfe2 incommunicado again, so memory reseating did NOT # completely fix problem ############################################################ Fri Nov 26 16:00:03 PST 2004 ############################################################ # vnfe2 now up with vn29's memory (vn29 thus down), need # to restart PGI license servers vnfeCommand 'killall -9 pgroupd' ssh root@vnfe1 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe2 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit ssh root@vnfe3 su adm /usr/local/pgi/linux86/bin/lmgrd.rc start exit vnfeCommand 'cd /root/TestPGI; make clean; make; ls' # Didn't do the trick for compilation on lnx! ############################################################ Sun Dec 5 09:24:52 PST 2004 ############################################################ # New account for Tim Blair (UBC UG, PHYS 410) # From physics tjblair:x:12965:400:Timothy John Blair:/home2/tjblair:/bin/tcsh tjblair:mfe.IfJYGM6nE:11935:::::: vi README.USERS 12965 tjblair # Timothy John Blair (UBC PHAS UG (PHYS410/Choptuik)) nu setenv NU tjblair cat<${NU} ${NU}:x:12965:20000:Timothy John Blair:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} telnet vnfe1 telnet vn35 etc sola; vs # mfe.IfJYGM6nE vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.phys410 .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Dec 10 09:15:51 PST 2004 CRASH_164 vn2 down 13:23 # In machine room, fan not running, nor on vn3. # Power supply most likely on vn2 # Immediate action ... check all fans etc. # See README.CRASH (CRASH_164) ############################################################ Fri Dec 10 09:25:42 PST 2004 ############################################################ CRASH_165 # vn3: Fan not running, but machine is, Take down and # offline possibly for local diagnosis and service # See README.CRASH (CRASH_165) ############################################################ Fri Dec 10 13:52:09 PST 2004 ############################################################ # TODO # New account for Derek Liu # From physics mcliu:x:12182:400:Derek Man Chun Liu:/home2/mcliu:/bin/tcsh mcliu:W9ZEIU4ivNLuc:12747::::::-1 vi README.USERS 12182 mcliu # Derek Man Chun Liu (UBC PHAS UG (PHYS410/Choptuik)) nu setenv NU mcliu cat<${NU} ${NU}:x:12182:20000:Derek Man Chun Liu:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} # As root@vnfe1 cp ~phys410/.cshrc ~mcliu; chown mcliu.phys410 ~mcliu/.cshrc telnet vnfe1 telnet vn35 etc sola; vs # W9ZEIU4ivNLuc vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.phys410 .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Dec 10 18:44:07 PST 2004 ############################################################ # Inventory of dead fans From psandhu@physics.ubc.ca Fri Dec 10 18:43:26 2004 Date: Fri, 10 Dec 2004 14:11:16 -0800 (PST) From: Pal Sandhu To: Matthew W. Choptuik Cc: Martin Snajdr Subject: Re: Power supplies on old cluster. Hi Matt, this is Pal. We have 16 front fans not working and 3 power supply fans not working( vn15 ,vn23 and vnfe3). I have shut down vn15 and vn23 but have left vnfe3 running for now. All the others I have left up but I can easily shut them down if you want. I have ordered all the replacements from varsity and they should be here next week. Just for your information the computers that are having problems are: Power supply: vnfe3, vn15, vn23 Front fans: 32,31,42,43,54,55,58,50,51,52,48,6,15,9,11,18,27 later..Pal ############################################################ Tue Dec 14 09:37:40 PST 2004 ############################################################ # Looks like Jeremy's two racks of SUN Optera (?) are here! ############################################################ Thu Dec 16 11:55:59 PST 2004 ############################################################ # TODO # New account for Jonathan Nakane, actually already has an # account from PHYS410 # As root@vnfe1 su jnakane cd ~jnakane mkdir .ssh chmod og-rwx .ssh cd .ssh scp root@lnx1:~jnakane/.ssh/authorized_keys . ############################################################ Thu Dec 16 12:12:45 PST 2004 ############################################################ # Prepare for tomorrow's shutdown, 10 AM ################################################################# # THURSDAY, DECEMBER 16, 12:30 PM # # # # *** IMPORTANT NOTICE *** # # # # PLEASE READ IN ENTIRETY AND, MOST IMPORTANTLY, ENSURE # # THAT ALL PROCESSES CURRENTLY RUNNING ON THE CLUSTER ARE # # TERMINATED BY TOMORROW MORNING AT 9:00 AM. # # # # The front end machines vnfe1 and vnfe3 will be down for # # hardware work from approximately 9:00 AM to noon TOMORROW, # # FRIDAY, DECEMBER 17. # # # # At this time, the two SCSI disks that were previously # # housed in vnfe2 will be installed in vnfe1 and vnfe3 (one # # each), and vnfe2 will at least temporarily cease to be part # # of the cluster. Since vnfe1, vnfe2 and vnfe3 provided # # a three way redundancy that was never needed (i.e. we never # # had more than one front-end node down at at time), this # # change should be essentially transparent to users. # # # # AGAIN AS MENTIONED ABOVE, PLEASE KILL ALL OF YOUR JOBS # # RUNNING ON THE CLUSTER *BEFORE* 9AM TOMORROW. THIS # # SAVES US TIME AND AGGRAVATION AND MAKES IT LESS LIKELY # # THAT WE'LL ACCIDENTALLY REMOVE YOUR HOME DIRECTORY AND # # ALL OF ITS CONTENTS. # # # # Thanks in advance for your cooperation and contact Pal # # (psandhu@physics.ubc.ca) or Matt (choptuik@physics.ubc.ca) # # should you have any questions/gripes about this matter. # ################################################################# vnNbgCommand 'scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node.new /etc/fstab.new' vnNCommand 'cat /etc/fstab.new' ############################################################ Tue Dec 21 09:22:19 PST 2004 ########################################################### # PGI compilers down # Restarted, OK # Update motd ############################################################ Tue Dec 21 10:45:44 PST 2004 ############################################################ # Various and sundry fan replacements, plus vn45 went down # See README.CRASH (CRASH_166 - CRASH_175 approximate :-)) # vnfe2:/home2 is apparently dead, link to /d/vnfe4/home/vnfe2-tmp/home2 vnallbgCommand 'mount -a; df | grep vnfe4' vnCommand 'ls -lt /d/vnfe4/home/vnfe2-tmp/home2' vnCommand 'ls -ltR /d/vnfe2/home2' vnCommand '/bin/rm -r /d/vnfe2/home2' vnCommand 'ln -s /d/vnfe4/home/vnfe2-tmp/home2 /d/vnfe2/home2' cat > motd.2004.12.21 < /d/vnfe2/home not quite right. Need # vnfe1:/home3/home -> /d/vnfe2/home # As root@vnfe3 vi /etc/fstab vnfe1:/home /d/vnfe1/home nfs rw,bg,hard,intr 0 0 vnfe1:/home2 /d/vnfe1/home2 nfs rw,bg,hard,intr 0 0 vnfe1:/home3/home /d/vnfe2/home nfs rw,bg,hard,intr 0 0 vnfe4:/home /d/vnfe4/home nfs rw,bg,hard,intr 0 0 # As matt@vnfe1 etc vi fstab.node vnNbgCommand 'cd /etc; Arc fstab; CP /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab' vnNbgCommand 'umount /d/vnfe2/home' vnNbgCommand 'mount -a' vnCommand 'ls ~liam' ############################################################ Fri Dec 10 13:52:09 PST 2004 ############################################################ # New account for Bryan Kelleher (UBC PHAS PDF Unruh) # From physics kelleher:x:271:307:Bryan Kelleher:/home/kelleher:/bin/tcsh kelleher:uadLtj2954.1M:12706:::::: vi README.USERS 271 kelleher # Bryan Kelleher (UBC PHAS PDF Unruh) nu setenv NU kelleher cat<${NU} ${NU}:x:271:800:Bryan Kelleher:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} # As root@vnfe1 cp ~phys410/.cshrc ~kelleher; chown kelleher.unruh ~kelleher/.cshrc telnet vnfe1 telnet vn35 etc sola; vs # uadLtj2954.1M vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo ${NU}@physics.ubc.ca > .forward; chown ${NU}.unruh .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 # OK DUPLICATED ACCOUNT ON bh0 # OK DUPLICATED ACCOUNT ON lnx[123] # OK ############################################################ Fri Dec 24 02:02:01 PST 2004 ############################################################ # Need to change http://laplace.physics.ubc.ca/VN/DISKUSAGE # vis a vis vnfe2's demise # As root@vnfe1 crontab -l # DO NOT EDIT THIS FILE - edit the master and reinstall. # (/tmp/crontab.18388 installed on Wed Nov 10 15:59:27 2004) # (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) # Format of lines: #min hour daymo month daywk cmd # Weekly backup of cluster machines #45 23 * * 1 /backups/scripts/weekly-backup # Level-0 backups (done once a month instead of weekly backup). #45 23 * * 1 /backups/scripts/vnfe1,2-level-0-backup # Reminder to do level-0's. #00 07 01 * * echo "Time to do the cluster's level-0s" | mail cwlai@physics.ubc.ca # Kill vlasov jobs 0,5,10,15,20,25,30,35,40,45,50,55 * * * * /d/vnfe1/home/matt/scripts/killall-vlasov # Get node status (load factors) and export to laplace.physics.ubc.ca Web pages 0,15,30,45 * * * * /d/vnfe1/home/matt/scripts/vnStatus ; /d/vnfe1/home/matt/scripts/vnPigs 0 05 * * * /d/vnfe1/home/matt/scripts/vnUsage # Check those pesky PGI compilers, and restart if necessary #0,10,20,30,40,50 * * * * /d/vnfe1/home/matt/scripts/vnTestPGI X #23 14,17,20,23,02,05,08,11 * * * /backups/trim # This script gets executed all day and will send an e-mail if one of the nodes is down 00 * * * * /root/ifdown #---------------------------------------------------------------------------------------------------- # As matt@bh0 cds Arc vnUsage vi !$ # Added third argument to functions to label summaries # As root@vnfe1 vnUsage ############################################################ Mon Jan 3 09:27:13 PST 2005 ############################################################ # Remove vnfe2 from rwho database # As matt@vnfe1 vnCommand '/bin/rm -f /var/spool/rwho/whod.vnfe2' ############################################################ Wed Jan 5 13:00:55 PST 2005 ############################################################ (1) Archiving Rtop files ... cd vn setenv TS `tsminus` echo $TS mv Rtop Rtop.${TS} mkdir Rtop mv Rtop.${TS} ../vnArchive cd !$ # As matt@vnfe4 cd vnArchive setenv TS 2005-01-05-1301 tar cf Rtop.${TS}.tar Rtop.${TS} gzip Rtop.${TS}.tar # Rtop.2005-01-05-1301 # 26282 26282 499364 # As matt@bh0 setenv TS 2005-01-05-1301 cd system/vnArchive cp /d/vnfe4/home/matt/system/vnArchive/Rtop.${TS}.tar.gz . tar zxf Rtop.${TS}.tar.gz cd Rtop.${TS} LS | wc # 26282 26282 499364 RM -rf Rtop.${TS} # As matt@vnfe4 cd vnArchive RM -r Rtop.${TS} mkdir -p /home/matt/system/vnshadow/Rtop.2004.07 mkdir -p /home/matt/system/vnshadow/Rtop.2004.08 mkdir -p /home/matt/system/vnshadow/Rtop.2004.09 mkdir -p /home/matt/system/vnshadow/Rtop.2004.10 mkdir -p /home/matt/system/vnshadow/Rtop.2004.11 mkdir -p /home/matt/system/vnshadow/Rtop.2004.12 mkdir -p /home/matt/system/vnshadow/Rtop.2005.01 # As matt@vnfe4 setenv TS 2005-01-05-1301 cd /home/matt/system/vnshadow/Rtop.2004 tar zxf ../../vnArchive/Rtop.${TS}.tar.gz cd Rtop.${TS} LS | pre mv | post .. | csh cd .. rmdir Rtop.${TS} cd /home/matt/system/vnshadow/Rtop.2004 LS | grep '^2004:07' | pre CP | post '../Rtop.2004.07' | csh LS | grep '^2004:08' | pre CP | post '../Rtop.2004.08' | csh LS | grep '^2004:09' | pre CP | post '../Rtop.2004.09' | csh LS | grep '^2004:10' | pre CP | post '../Rtop.2004.10' | csh LS | grep '^2004:11' | pre CP | post '../Rtop.2004.11' | csh LS | grep '^2004:12' | pre CP | post '../Rtop.2004.12' | csh LS | grep '^2005:01' | pre CP | post '../Rtop.2005:01' | csh mkdir -p /home/matt/system/vnshadow/Rtop.2005 cd /home/matt/system/vnshadow/Rtop.2004 mv 2005:* /home/matt/system/vnshadow/Rtop.2005 cd /home/matt/system/vnshadow/Rtop.2005 ############################################################ Thu Jan 13 09:47:45 PST 2005 ############################################################ # Further preparations for upgrade # Parallel run # As matt@vnfe1 cda tar zxf mpich-examples.tar.gz cd mpich-examples poptp make cd basic Mpirun 64 cpi # OK --- takes a while to start up, but what the hey? # vis a vis problems with Heyl's cluster time Mpirun 21 cpi . . . Starting top on bracketing machines Warning: Permanently added 'vn32' (RSA) to the list of known hosts. Process 0 of 21 on vn1.physics.ubc.ca pi is approximately 3.1415926544231274, Error is 0.0000000008333343 wall clock time = 0.005423 Process 20 of 21 on vn32.physics.ubc.ca Process 15 of 21 on vn28.physics.ubc.ca Process 2 of 21 on vn11.physics.ubc.ca Process 1 of 21 on vn10.physics.ubc.ca Process 6 of 21 on vn2.physics.ubc.ca Process 14 of 21 on vn27.physics.ubc.ca Process 3 of 21 on vn17.physics.ubc.ca Process 5 of 21 on vn19.physics.ubc.ca Process 4 of 21 on vn18.physics.ubc.ca Process 7 of 21 on vn20.physics.ubc.ca Process 11 of 21 on vn24.physics.ubc.ca Process 9 of 21 on vn22.physics.ubc.ca Process 8 of 21 on vn21.physics.ubc.ca Process 10 of 21 on vn23.physics.ubc.ca Process 12 of 21 on vn25.physics.ubc.ca Process 13 of 21 on vn26.physics.ubc.ca Process 17 of 21 on vn3.physics.ubc.ca Process 16 of 21 on vn29.physics.ubc.ca Process 18 of 21 on vn30.physics.ubc.ca Process 19 of 21 on vn31.physics.ubc.ca 0.270u 0.810s 0:04.45 24.2% 0+0k 0+0io 20940pf+0w 0.310u 0.150s 0:07.07 6.5% 0+0k 0+0io 4386pf+0w ############################################################ Fri Jan 14 14:54:00 PST 2005 ############################################################ # T minus something hours, now have new 4-port KVM (doesn't # need power is the claim!) and two 15' cable sets. Unwrap # one, the other is currently on vn11, TODO: Packaging on UPS. # KVM switch should be mounted on the monitor somehow. Will # friction afix for the time being, will only use original # and one set of new cables. Powering off monitor. Fri Jan 14 15:14:27 PST 2005 # ... and we're back with KVM functionality. 15' cord is # just fine tied it off to the mid-back of the cluster. # Top level Makefile now has an export function, which exports # to a WEB location near you. .IGNORE: default: toreboot XWEB="matt@bh0.physics.ubc.ca:/Public/Members/matt/System/VN-10.1" XFILES=README export: $(XFILES) echo "Exporting README by default!!" scp $(XFILES) $(XWEB) toreboot: test -f README.TOREBOOT && scp README.TOREBOOT matt@laplace:/Public/People/matt/Doc/VN/TOREBOOT ssh matt@laplace 'cd /usr2/Public/Members/matt/Doc/VN; ./wr' ############################################################ Fri Jan 14 16:51:23 PST 2005 ############################################################ # vn25 was incommunicado since sshd wasn't running since # user sshd didn't exist. This IS fixed in installation # script so following connection of vn25 to KVM switch, # I execute # As root@vn25 ssh matt@bh0 post-install | bash # is now rebooting, with luck will come back alive # OK # As root@vn24 # Same story, so my guys didn't follow instructions and # get a fresh script, they used an OLD script that DOESN'T # deal with the sshd issue (SIGH!!), same deal ssh matt@bh0 post-install | bash # OK cda tar zxf mpich-examples.tar.gz cd mpich-examples poptp make cd basic Mpirun 64 cpi # Will execute 'ssh vn1 cd /d/vnfe1/home/matt/autoconf/mpich-examples/basic; time mpirun -np 64 -machinefile mfile cpi ' Will use the following machine file vn10 vn11 vn17 vn18 vn19 vn2 vn20 vn21 vn22 vn23 vn25 vn26 vn27 vn28 vn29 vn3 vn30 vn31 vn32 vn33 vn34 vn35 vn36 vn37 vn38 vn39 vn4 vn40 vn41 vn42 vn43 vn44 vn45 vn46 vn47 vn48 vn49 vn5 vn50 vn51 vn52 vn53 vn54 vn55 vn56 vn57 vn6 vn62 vn63 vn64 vn7 vn8 vn9 vn24 vn16 vn15 vn12 vn13 vn14 vn58 vn60 vn61 vn59 Starting top on bracketing machines konsole: cannot connect to X server konsole: cannot connect to X server Process 0 of 64 on vn1.physics.ubc.ca pi is approximately 3.1415926544231265, Error is 0.0000000008333334 wall clock time = 0.016907 Process 2 of 64 on vn11.physics.ubc.ca Process 56 of 64 on vn15.physics.ubc.ca Process 60 of 64 on vn58.physics.ubc.ca Process 30 of 64 on vn42.physics.ubc.ca Process 61 of 64 on vn60.physics.ubc.ca Process 59 of 64 on vn14.physics.ubc.ca Process 58 of 64 on vn13.physics.ubc.ca Process 57 of 64 on vn12.physics.ubc.ca Process 24 of 64 on vn37.physics.ubc.ca Process 44 of 64 on vn55.physics.ubc.ca Process 29 of 64 on vn41.physics.ubc.ca Process 55 of 64 on vn16.physics.ubc.ca Process 54 of 64 on vn24.physics.ubc.ca Process 46 of 64 on vn57.physics.ubc.ca Process 28 of 64 on vn40.physics.ubc.ca Process 48 of 64 on vn62.physics.ubc.ca Process 52 of 64 on vn8.physics.ubc.ca Process 47 of 64 on vn6.physics.ubc.ca Process 27 of 64 on vn4.physics.ubc.ca Process 43 of 64 on vn54.physics.ubc.ca Process 38 of 64 on vn5.physics.ubc.ca Process 40 of 64 on vn51.physics.ubc.ca Process 23 of 64 on vn36.physics.ubc.ca Process 50 of 64 on vn64.physics.ubc.ca Process 42 of 64 on vn53.physics.ubc.ca Process 19 of 64 on vn32.physics.ubc.ca Process 12 of 64 on vn26.physics.ubc.ca Process 53 of 64 on vn9.physics.ubc.ca Process 45 of 64 on vn56.physics.ubc.ca Process 39 of 64 on vn50.physics.ubc.ca Process 16 of 64 on vn3.physics.ubc.ca Process 41 of 64 on vn52.physics.ubc.ca Process 11 of 64 on vn25.physics.ubc.ca Process 51 of 64 on vn7.physics.ubc.ca Process 37 of 64 on vn49.physics.ubc.ca Process 31 of 64 on vn43.physics.ubc.ca Process 36 of 64 on vn48.physics.ubc.ca Process 49 of 64 on vn63.physics.ubc.ca Process 63 of 64 on vn59.physics.ubc.ca Process 26 of 64 on vn39.physics.ubc.ca Process 62 of 64 on vn61.physics.ubc.ca Process 21 of 64 on vn34.physics.ubc.ca Process 20 of 64 on vn33.physics.ubc.ca Process 14 of 64 on vn28.physics.ubc.ca Process 7 of 64 on vn20.physics.ubc.ca Process 22 of 64 on vn35.physics.ubc.ca Process 18 of 64 on vn31.physics.ubc.ca Process 15 of 64 on vn29.physics.ubc.ca Process 13 of 64 on vn27.physics.ubc.ca Process 25 of 64 on vn38.physics.ubc.ca Process 9 of 64 on vn22.physics.ubc.ca Process 6 of 64 on vn2.physics.ubc.ca Process 10 of 64 on vn23.physics.ubc.ca Process 32 of 64 on vn44.physics.ubc.ca Process 17 of 64 on vn30.physics.ubc.ca Process 4 of 64 on vn18.physics.ubc.ca Process 34 of 64 on vn46.physics.ubc.ca Process 35 of 64 on vn47.physics.ubc.ca Process 5 of 64 on vn19.physics.ubc.ca Process 8 of 64 on vn21.physics.ubc.ca Process 3 of 64 on vn17.physics.ubc.ca Process 33 of 64 on vn45.physics.ubc.ca Process 1 of 64 on vn10.physics.ubc.ca 1.410u 2.720s 0:28.25 14.6% 0+0k 0+0io 59425pf+0w 0.50user 0.20system 0:30.85elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (4358major+2201minor)pagefaults 0swaps ############################################################ Fri Jan 14 17:22:05 PST 2005 ############################################################ # Moved vnfe1:/home/matt # to vnfe4:/home/vnfe1-tmp/home/matt # OK ############################################################ Fri Jan 14 20:59:23 PST 2005 ############################################################ # Just Mona running now ... Summary of vn.physics.ubc.ca usage: Thu Jan 13 17:41:07 PST 2005 Top 128 processes idle 65 root 46 berciu 16 sbegin 1 /home/berciu # So move vnfe3:/home vnfe3:/home2 vnfe1:/home vnfe1:/home2 vnfe1:/home3 # As matt@head foreach v (vnfe3:/home vnfe3:/home2 vnfe1:/home vnfe1:/home2 vnfe1:/home3/home) vnCommand "umount $v" end vnCommand umount vnfe4:/home vnCommand df # A few vnfe1 mounts left, due to Mona's jobs # As root@vnfe4 mkdir /d/VNFE1 mkdir /d/VNFE1/home3 mkdir /d/VNFE3 mv /d/vnfe1/home /d/VNFE1 & mv /d/vnfe1/home2 /d/VNFE1 & mv /d/vnfe3/home /d/VNFE3 & mv /d/vnfe3/home2 /d/VNFE3 & mv /d/vnfe2/home /d/VNFE1/home3 & # OOPS, major f^&k-up since that moves the stuff onto the (tiny) / # parition. Interrupt jobs then verify Usage's on various partitions # relative to this AMs. vnfe1:/home # OK vnfe1:/home2 # OK vnfe1:/home3/home # OK vnfe3:/home # OK vnfe3:/home2 # OK # Now for the second attempt! mkdir /home/VNFE1 mkdir /home/VNFE3 mkdir /home/VNFE3/home3 mv /d/vnfe1/home /home/VNFE1 & mv /d/vnfe1/home2 /home/VNFE1 & mv /d/vnfe2/home /home/VNFE1/home3 & mv /d/vnfe3/home /home/VNFE3 & mv /d/vnfe3/home2 /home/VNFE3 & # Also archive /etc, /var # As root@head cd /home/VNFE1 scp -r root@vnfe1:/etc cd /home/VNFE3 scp -r root@vnfe3:/etc # As root@vnfe1 cp -a /var /d/vnfe4/home/VNFE1 # As root@vnfe3 cp -a /var /d/vnfe4/home/VNFE3 # As root@vnfe1 cd /usr tar cf local.tar local ftp vnfe4 # Transfer local.tar to vnfe4:/home/VNFE1 # Unmount /d/vnfe[123]/* from bh, lnx machines ############################################################ Sat Jan 15 07:23:19 PST 2005 ############################################################ # vnfe1:{/home,/home2,/home3/home} and vnfe3:/home relocated # successfully. # As root@head cd /home/VNFE3 mv /d/vnfe3/home2 . vnallCommand 'mount /d/vnfe4/home' etc vi passwd matt:!:243:600:Matthew Choptuik:/d/vnfe4/home/vnfe1-tmp/home/matt:/bin/tcsh #----------------------------------------------------------------------- # vn24 and vn25 not responding! Hope this isn't a harbinger of # things to come! #----------------------------------------------------------------------- # Killing Mona's jobs foreach u (berciu) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end # OK ... as soon as the move of vnfe3:/home2 to vnfe4 is complete, we should # be ready to rock and roll, starting with the installation on vnfe1 # and vnfe3 Sat Jan 15 08:35:48 PST 2005 # Backgrounding the mv, killing the shell, continuing Usage to ensure # mv continues unimpeded. 3840792 suqin 1989180 wkb 4 lost+found 4 Usage [root@head home2]# usage 3940640 suqin 1989180 wkb 4 lost+found 4 Usage [root@head home2]# usage 4005004 suqin 1989180 wkb 4 lost+found 4 Usage # Off to the lab # but being REALLY paranoid, check mv status from 7100. ############################################################ Sat Jan 15 09:35:25 PST 2005 ############################################################ # Problems with post-install script, sshd not in /etc/passwd # due to massive system confusion associated with matt having # root scripts etc # As root@vnfe4 ssh matt@bh0 post-install | bash # Disabling graphical boot # Upgrading vnfe1 # vnfe3:/home2 mv completed # Upgrading vnfe3 # Hacking on matt@bh0.physics.ubc.ca:/d/laplace/usr2/people/ftp/pub/vns/post-install-0-fe # As matt@bg0 cd .ssh vi known_hosts # Expunged vn, 142.103.237 entries Dist # As root@vnfe1 ssh matt@142.103.234.164 post-install-fe # Default route was incorretly set # Fixed and reboot # As root@vnfe1 ssh matt@bh0 post-install-fe ssh matt@bh0 post-install-fe | bash # OK # As root@vnfe1 ls /d/vnfe4/home/VNFE1/{home,home2} cd /home mv /d/vnfe4/home/VNFE1/home/* . & cd /home2 mv /d/vnfe4/home/VNFE1/home2/* . & cd /home3 mv /d/vnfe4/home/VNFE1/home3/* . & # Suspending jobs temporarily to avoid load # /home2, /home3 partitions not made drakconf vi /etc/fstab # As root@vnfe3 diskdrake ssh matt@bh0 post-install-fe ssh matt@bh0 post-install-fe | bash # OK # As root@vnfe3 ls /d/vnfe4/home/VNFE3/{home,home2} cd /home mv /d/vnfe4/home/VNFE3/home/* . & cd /home2 mv /d/vnfe4/home/VNFE3/home2/* . & vn1 # cp matt home from vnfe4 # Install not going well since uses fixed IP address # Starting with vn1 # Still struggling with drakautoinst, but vn1 is ready for secondary ssh root@vn1 ssh matt@bh0 post-install | bash # Have the assembly line going now !!ssh vnfe1 mw mk-sys-cfg :::::::::::::: /d/vnfe1/home/matt/scripts/mk-sys-cfg :::::::::::::: #!/bin/sh -x P=`basename $0` F=syslinux.cfg case $# in 1) n=$1;; *) echo "usage: $P "; exit 1; esac cat>$F< /tmp/kh; grep -v '^142.103.237' /tmp/kh > known_hosts # Unfortunately, although we have a pipeline going, manual intervention is still required since apparently # IP address is not only controlled by syslinux.cfg # ntpd not installed/running foreach n (vn1 vn2 vn4 vn5 vn6 vn7 vn8 vn9 vn10 vn11 vn12 vn13 vn15 vn24 vn25 vn26) ssh root@${n} 'cd /d/vnfe4/home/Mandrake101/cd1/media/main; rpm -ivh ntp-4.2.0-9mdk.i586.rpm ntp-client-4.2.0-9mdk.i586.rpm' end ############################################################ Sat Jan 15 13:29:37 EST 2005 ############################################################ # Restore /usr/local on vnfe[13] # Transfer local.tar to vnfe4:/home/VNFE1 # As root@vnfe1 cd /usr mv local local.O tar xf /d/vnfe4/home/VNFE1/local.tar # As root@vnfe1 cd /usr mv local local.O tar xf /d/vnfe4/home/VNFE1/local.tar # As root@vnfe3 cd /home nice +19 mv /d/vnfe4/home/VNFE3/home/* . & cd /home2 nice +19 mv /d/vnfe4/home/VNFE3/home2/* . & # TIME ZONE timeconfig --utc PST8PDT foreach n (vnfe3 vn1 vn2 vn4 vn5 vn6 vn7 vn8 vn9 vn10 vn11 vn12 vn13 vn15 vn24 vn25 vn26) ssh root@$n 'timeconfig --utc PST8PDT; vnSetdate' end ############################################################ Sat Jan 15 18:41:34 PST 2005 ############################################################ # PGI compilers 5.2-2 # As root@{vnfe1,vnfe3} mkdir -p /d/bh0/home vi /etc/fstab bh0:/home /d/bh0/home nfs rw,bg,hard,intr 0 0 mv /usr/local/pgi /usr/local/pgi.O mount -a cd /d/bh0/home/matt/system/C2/pgi-cdk-5.2-2 installcdk # install in /usr/local/pgi cat>/usr/local/pgi/license.dat</usr/local/pgi/linux86/5.2/bin/localrc<> /etc/ld.so.conf< 698580 wkb 348592 xiao 142716 yfan 53720 ytwang 6012 zheqiong 4 Usage.O 4 Usage # Move xiao and yfan to /d/vnfe2/home # As root@head ls /home/VNFE3/home/{xiao,yfan} mv /home/VNFE3/home/{xiao,yfan} /d/vnfe2/home # As root@vnfe3 mv /home/wkb /home2/wkb-0 ls /home/VNFE3/home/{wkb,ytwang,zheqiong} mv /home/VNFE3/home/{wkb,ytwang,zheqiong} /d/vnfe2/home # USER accounts to change # wkb /d/vnfe2/home/wkb # xiao /d/vnfe2/home/xiao/ # yfan /d/vnfe2/home/yfan/ # ytwang /d/vnfe2/home/ytwang/ # zheqiong /d/vnfe2/home/zheqiong/ # All but following nodes are up vn6, vn20, vn25, vn52 # Updated web page, motd, and called it a night! Sun Jan 16 02:38:55 PST 2005 ############################################################ Sun Jan 16 09:00:41 PST 2005 ############################################################ # Get MPI going, possibly enabling rsh among front-ends # and nodes # Had to fix up root's crontab # As root@vnfe1 # rsh-0.17-13mdk installed vi /etc/xinetd.d/rlogin disable = no killall -HUP xinetd # Make /etc/hosts.equiv file cd /etc touch hosts.equiv && bu hosts.equiv echo "vnfe1.physics.ubc.ca" > hosts.equiv echo "vnfe3.physics.ubc.ca" >> hosts.equiv foreach i (`iota 64`) echo "vn${i}.physics.ubc.ca" >> hosts.equiv end # Looks as if 'rsh' is still enabled by default, can't # get rlogin working immediately, but will delay, # need to recover /usr/local on the nodes vnCommand 'ls -ltd /usr/local' vnCommand 'mv /usr/local /usr/local.O' # First, distribute a few copies of local.tar # As root@bh0 cd /home; cp /d/vnfe4/home/VNFE1/local.tar . # As root@vnfe1 cd /home; cp /d/vnfe4/home/VNFE1/local.tar . # As root@vnfe3 cd /home2; cp /d/vnfe4/home/VNFE1/local.tar . # Coded untar-local # Add bh0 mount to etc vi fstab bh0:/home /d/bh0/home nfs rw,bg,hard,intr 0 0 vnallbgCommand 'mkdir -p /d/bh0/home' vnDistEtc fstab vnallbgCommand 'mount -a' foreach n (`iota 64`) echo "vn${n}.physics.ubc.ca(async,rw,no_root_squash) \\" >> /tmp/ex end foreach h (142.103.237.1 142.103.237.2 142.103.237.3 142.103.237.4 142.103.237.5 142.103.237.7 142.103.237.8 142.103.237.9 142.103.237.10 142.103.237.11 142.103.237.12 142.103.237.13 142.103.237.14 142.103.237.15 142.103.237.16 142.103.237.17) ssh root@${h} untar-local /d/vnfe4/home/VNFE1/ & end foreach h ( 142.103.237.18 142.103.237.19 142.103.237.21 142.103.237.22 142.103.237.23 142.103.237.24 142.103.237.26 142.103.237.27 142.103.237.28 142.103.237.29 142.103.237.30 142.103.237.31 142.103.237.32 142.103.237.33 142.103.237.34 142.103.237.35) ssh root@${h} untar-local /d/vnfe1/home & end foreach h ( 142.103.237.36 142.103.237.37 142.103.237.38 142.103.237.39 142.103.237.40 142.103.237.41 142.103.237.42 142.103.237.43 142.103.237.44 142.103.237.45 142.103.237.46 142.103.237.47 142.103.237.48 142.103.237.49 142.103.237.50 142.103.237.51) ssh root@${h} untar-local /d/vnfe3/home2 & end foreach h (142.103.237.53 142.103.237.54 142.103.237.55 142.103.237.56 142.103.237.57 142.103.237.58 142.103.237.59 142.103.237.60 142.103.237.61 142.103.237.62 142.103.237.63 142.103.237.64) ssh root@${h} untar-local /d/bh0/home & end # Expect ... du -hs /usr/local 548M /usr/local # OK # Basic MPI test vnMpptest vn1 vn2 Model complexity is (0.000000e+00 + n * 9.129054e-08) # startup = 0.00 usec and transfer rate = 10.95 Mbytes/sec # Variance in fit = 0.020287 (smaller is better) plot 'mppout.gpl' using 4:5 with lines,\ 0.000000+0.091291*x with dots pause -1 "Press to continue" # OK vnallCommand 'cd /d/vnfe4/home/Mandrake101/CD1/10.1/i586/media/main; rpm -ivh *finger*' # CVS fix to get rid of # cvs [checkout aborted]: Cannot check out files into the repository itself # error # As idle@vnfe1 vi ~/.cshrc setenv TMPDIR /var/tmp # OK # PAMR test # As matt@vnfe1 cda cvs co pamr cd pamr source ~matt/scripts/soPGI-mpich configure --prefix=`pwd` make # Copy /usr/local/pgi to all nodes vnCommand 'mv /usr/local/pgi /usr/local/pgi.O' # As root@vnfe1 tar cf /d/vnfe1/home/pgi.tar /usr/local/pgi vnNbgCommand 'cd /usr/local; tar xf /d/vnfe1/home/pgi.tar' vnCommand "test -d /usr/local/usr/local && cd /usr/local/usr/local; ls -ltd /usr/local/pgi || echo '/usr/local/pgi does not yet exist'" vnCommand "test -d /usr/local/usr/local && cd /usr/local/usr/local; ls -ltd /usr/local/pgi || mv pgi /usr/local; ls -ltd /usr/local/pgi" # OK ... try not to do that again!! ssh root@vnfe4 'cd /usr/local; tar cf pgi.tar pgi; mv pgi.tar /d/vnfe4/home/' # Code MPI tester vnTestPgi # As matt@vnfe1 cds new sh-script mv sh-script vnTestPGI vnallCommand vnTestPGI # pgf77 now found on nodes, csh.cshrc and/or /root/.cshrc # needs updating # As matt@vnfe1 etc make import vi csh.cshrc # Sure enough, path was munged etenv PGI /usr/local/pgi setenv INTEL /opt/intel if ($?PATH) then setenv PATH "${INTEL}/bin:${PGI}/linux86/5.2/bin:${PATH}:/usr/X11R6/bin" else setenv PATH "${INTEL}/bin:${PGI}/linux86/5.2/bin:/bin:/usr/bin:/usr/local/bin:/usr/X11R6/bin" endif vnallCommand vnTestPGI # ... AND root's .cshrc needs modification # As matt@vnfe1 etc scp root@vnfe1:/root/.cshrc root.cshrc vnallbgCommand 'cd /root; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/root.cshrc .cshrc' # Looks like it could be problematic, vis a vis overloading sshd? vnallCommand 'cd /root; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/root.cshrc .cshrc' vnallCommand vnTestPGI # OK? # "Distributing" INTEL compiler as well # As root@vnfe1 cd /opt tar cf intel.tar intel mmkdir v intel.tar /d/vnfe4/home vnCommand 'mkdir -p /opt' vnCommand 'cd /opt; tar xf /d/vnfe4/home/intel.tar' # As matt@vnfe1 etc scp root@vnfe1:/etc/ld.so.conf . vi ld.so.conf /usr/local/pgi/linux86/5.2/lib /usr/local/pgi/linux86/5.2/liblf vnDistEtc ld.so.conf vnallbgCommand ldconfig vnallCommand 'vnTestPgi; vnTestINTEL' | tee /tmp/compile # OK # using "local" version of ssh, probably simply have to # remove old version, since things work without /usr/local vnallCommand 'ls /usr/local/{bin,sbin,lib,include}/*ssh*' vnallCommand 'RM -r /usr/local/{bin,sbin,lib,include}/*ssh*' vnallCommand 'ls /usr/local/{bin,sbin,lib,include}/*scp*' vnallCommand 'RM -r /usr/local/{bin,sbin,lib,include}/*scp*' vnallCommand 'ls /usr/local/{bin,sbin,lib,include}/*ssh*' vnallCommand 'which sshd; which ssh' vnallCommand 'ls /usr/local/{bin,sbin,lib,include}/*scp*' vnallCommand 'which scp' # OK # TODO: # re-compile mpich # # PGI # INTEL # # Have most recently built on Jeremy's machine, jmd.physics.ubc.ca # matt@bh0:~/system/JMD/README cda cd mpich-1.2.6 cds cp vnMPImakePG vnMPImakePG-6.1 scp matt@jmd.physics.ubc.ca:/home/matt/scripts/c2MPImakePG vnMPImakePG # Hacked on vnMPImakePG vi vnMPImakePG # but disable install, and test on vn1 # As root@vn1 cd /usr/local; cp -r PGI PGI-sav vnMPImakePG cd /var/tmp/install/mpich-1.2.6 make install ############################################################ Sun Jan 16 15:32:30 PST 2005 ############################################################ # Mail ? vnallCommand 'echo `hostname` | Mail -s "test" choptuik@physics.ubc.ca' # Need to update /etc/groups # As matt@vnfe1 etc scp root@vnfe1:/etc/group . vnDistEtc group vnallCommand 'service postfix start' # Doesn't work # Mail can wait, MPI is first #----------------------------------------------------------------------- Mon Jan 17 14:30:32 PST 2005 #----------------------------------------------------------------------- # But now seems OK vnNbgCommand 'service postfix start' # As root@vn1 cdi cd mpich-1.2.6/ # TODO make install scp matt@jmd.physics.ubc.ca:/home/matt/autoconf/Installz.mpich.pgi . ln -s Installz.mpich.pgi P vi P # old versions of mpi lurk in /usr/local # Remove only from vn1 to begin with # As root@vn1 ls /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]* /usr/local/bin/mpicc* /usr/local/bin/mpirun.ch_p4* /usr/local/bin/mpiCC* /usr/local/bin/mpirun.pg* /usr/local/bin/mpif77* /usr/local/include/mpidefs.h /usr/local/bin/mpif90* /usr/local/include/mpi_errno.h /usr/local/bin/mpiman* /usr/local/include/mpif.f90 /usr/local/bin/mpireconfig* /usr/local/include/mpif.h /usr/local/bin/mpirun* /usr/local/include/mpi.h /usr/local/bin/mpirun.args* /usr/local/lib/libmpi.a@ /bin/rm -rf `!!` # Also old /usr/local/PGI /bin/rm -rf /usr/local/PGI # Now /usr/local/PGI/bin, /usr/local/INTEL/bin must be in path # As matt@vnfe1 etc Arc csh.cshrc vi csh.cshrc setenv PGI /usr/local/pgi setenv INTEL /opt/intel setenv PGIBIN /usr/local/PGI/bin setenv INTELBIN /usr/local/INTEL/bin if ($?PATH) then setenv PATH "${PGIBIN}:${INTELBIN}:${INTEL}/bin:${PGI}/linux86/5.2/bin:${PATH}:/usr/X11R6/bin" else setenv PATH "${PGIBIN}:${INTELBIN}:${INTEL}/bin:${PGI}/linux86/5.2/bin:/bin:/usr/bin:/usr/local/bin:/usr/X11R6/bin" endif vnDistEtc csh.cshrc # To modify {matt,root} # as matt@vnfe1 vics set path=(. .. ../.. ../../.. ../bin $HOMEMWC/bin $HOMEMWC/scripts /usr/sbin /sbin $path) # as root@vnfe1, first rationalize vi with matt cd /root cp ~matt/.exrc . vnallbgCommand 'scp -q root@vnfe1.physics.ubc.ca:/root/.exrc .' sleep 5 vnallCommand 'cat .exrc' # OK vics set path=(. $HOMEMWC/scripts $path) alias vi '/usr/bin/vi' which mpirun /usr/local/PGI/bin/mpirun # OK vnallbgCommand 'scp -q root@vnfe1.physics.ubc.ca:/root/.cshrc .' sleep 5 vnallCommand 'cat .cshrc' # OK, back to testing and installation of PGI/INTEL mpi versions # As root@vn1 cdi cd mpich-1.2.6/ make install # prefix is /root?? # Yup, missed the HOME -> PREFIX resulting from sloppy earlier # programming # so rinse, and repeat # First make vn2 symmetric, by trashing all of its existing mpi files # As root@vn2 ls /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]* /usr/local/bin/mpicc* /usr/local/bin/mpirun.ch_p4* /usr/local/bin/mpiCC* /usr/local/bin/mpirun.pg* /usr/local/bin/mpif77* /usr/local/include/mpidefs.h /usr/local/bin/mpif90* /usr/local/include/mpi_errno.h /usr/local/bin/mpiman* /usr/local/include/mpif.f90 /usr/local/bin/mpireconfig* /usr/local/include/mpif.h /usr/local/bin/mpirun* /usr/local/include/mpi.h /usr/local/bin/mpirun.args* /usr/local/lib/libmpi.a@ /bin/rm -rf `!!` set N=vn2; ssh root@${N} 'ls /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]*' set N=vn2; ssh root@${N} '/bin/rm -rf `ls /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]*`' set N=vn2; ssh root@${N} 'ls /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]*' set N=vn2; ssh root@${N} '/bin/rm -rf /usr/local/lib/libmpi.a' set N=vn2; ssh root@${N} 'ls -lt /usr/local/lib/libmpi.a' # As root@{vn1,vn2} vnMPImakePG cdi cd mpich-1.2.6/ make install # vn2 install very slow # /bin/rm -rf /usr/local/PGI # see above, need to get rid of all old versions # Side=-by-side set C=vnMPImakePG echo "`date`: Start $C" >> /tmp/syslog time $C | tee -a /tmp/syslog echo "`date`: End $C" >> /tmp/syslog cd mpich-1.2.6/ make install # PGI looks superficially OK? ssh matt@vn1 ls -lt `which mpirun` # As matt@vn1 cds vi vnMpptest-PG cd /d/vnfe1/home/matt/demo/perftest make clean popt-mpich make mpptest # As matt@vnfe1 vnMpptest-PG vn1 vn2 # Looks good. # OK, clean out all nodes, and do all vnNbgCommand 'RM -r /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]*' sleep 5 vnCommand 'ls -ltd /usr/local/{man,include,lib,bin,sbin}/*[mM][pP][iI]*' vnNbgCommand 'RM -r /usr/local/PGI' sleep 5 vnCommand 'ls -ltd /usr/local/PGI' vnNbgCommand 'vnMPImakePG' vnCommand 'cd /d/vnfe1/home/matt/demo/perftest; make clean; popt-mpich; make mpptest' # OK # Should be ready to run cdex popt-mpich make clean; make cpi time Mpirun-notop 8 cpi # OK foreach n (8 16 24 32 40 48 56 60) time Mpirun-notop $n cpi | tee -a /tmp/cpi-test end 0.292u 0.528s 0:03.45 23.4% 0+0k 0+0io 0pf+0w 0.490u 0.950s 0:06.97 20.6% 0+0k 0+0io 0pf+0w 0.683u 1.367s 0:10.09 20.2% 0+0k 0+0io 0pf+0w 0.897u 1.783s 0:13.13 20.3% 0+0k 0+0io 0pf+0w 1.075u 2.271s 0:15.89 21.0% 0+0k 0+0io 0pf+0w 1.313u 2.664s 0:19.83 20.0% 0+0k 0+0io 0pf+0w 1.535u 3.062s 0:21.43 21.4% 0+0k 0+0io 0pf+0w 1.585u 3.377s 0:22.04 22.4% 0+0k 0+0io 0pf+0w # Following on-line PAMR instructions # As matt@vn60 cda RM -r pamr cvs co pamr cd pamr source ~matt/scripts/soPGI-mpich configure --prefix=`pwd` make cd examples/wave cp ~matt/templates/pamr-wave.sh . pamr-wave.sh 9 # OK 18.896u 4.947s 0:46.90 50.8% 0+0k 0+0io 3pf+0w foreach n (2 4 8 9 10 16 20 25 30 36 40 45 50 60) time pamr-wave.sh $n | tee -a /tmp/pamr-wave-test end !!grep 0pf /tmp/pamr-wave-test 2: 50.364u 4.980s 1:13.73 75.0% 0+0k 0+0io 0pf+0w 4: 28.434u 4.377s 0:50.58 64.8% 0+0k 0+0io 0pf+0w i8: 19.283u 4.791s 0:47.29 50.8% 0+0k 0+0io 0pf+0w 10 0.279u 0.539s 0:00.95 84.2% 0+0k 0+0io 0pf+0w ? 16: 19.364u 4.534s 0:47.88 49.8% 0+0k 0+0io 0pf+0w 20: 0.407u 0.878s 0:01.45 87.5% 0+0k 0+0io 0pf+0w ? 25: 26.152u 5.773s 0:55.42 57.5% 0+0k 0+0io 0pf+0w 30: 32.007u 6.343s 1:17.90 49.2% 0+0k 0+0io 0pf+0w 40.567u 6.992s 1:21.08 58.6% 0+0k 0+0io 0pf+0w 51.636u 7.640s 1:45.05 56.4% 0+0k 0+0io 0pf+0w 59.370u 8.776s 1:51.01 61.3% 0+0k 0+0io 0pf+0w 50.364u 4.980s 1:13.73 75.0% 0+0k 0+0io 0pf+0w 28.434u 4.377s 0:50.58 64.8% 0+0k 0+0io 0pf+0w 19.283u 4.791s 0:47.29 50.8% 0+0k 0+0io 0pf+0w 0.279u 0.539s 0:00.95 84.2% 0+0k 0+0io 0pf+0w 19.364u 4.534s 0:47.88 49.8% 0+0k 0+0io 0pf+0w 0.407u 0.878s 0:01.45 87.5% 0+0k 0+0io 0pf+0w 26.152u 5.773s 0:55.42 57.5% 0+0k 0+0io 0pf+0w 32.007u 6.343s 1:17.90 49.2% 0+0k 0+0io 0pf+0w 40.567u 6.992s 1:21.08 58.6% 0+0k 0+0io 0pf+0w 51.636u 7.640s 1:45.05 56.4% 0+0k 0+0io 0pf+0w 59.370u 8.776s 1:51.01 61.3% 0+0k 0+0io 0pf+0w 71.235u 9.087s 2:06.89 63.2% 0+0k 0+0io 0pf+0w 82.850u 10.035s 2:25.60 63.7% 0+0k 0+0io 0pf+0w 109.861u 11.399s 2:53.45 69.9% 0+0k 0+0io 0pf+0w #----------------------------------------------------------------------- # TODO # Check VN pages into CVS so that they can be modified #----------------------------------------------------------------------- # INTEL version of mpich # !!! continues at Mon Jan 17 12:05:00 PST 2005 ######################################################################## Mon Jan 17 10:23:35 PST 2005 ######################################################################## # Somehow had managed to mung /etc/fstab on vnfe[13] so that they # were using ext2 mounts, used boot CD in rescue mode and all seems # OK now. # Need to move some folk from vnfe1:/home to vnfe1:/home2 # As root@vnfe1 cd /home /bin/rm local.tar pgi.tar foreach u (akeshet berciu mcliu joos pineda tupitsyn tzenova nodwell dvernon) test -d /home2/$u && echo "/home2/$u already exists" end foreach u (akeshet berciu mcliu joos pineda tupitsyn tzenova nodwell dvernon) test -d /home2/$u || (cd /home; echo "Executing 'mv $u /home2'"; mv $u /home2;) end # As matt@vnfe1 etc vi passwd # foreach u (/akeshet /berciu /mcliu /joos /pineda /tupitsyn /tzenova /nodwell /dvernon /d/vnfe1/home -> /d/vnfe1/home2 vnDistEtc passwd # ssh root@vn23 vnNbgCommand 'umount -l /d/vnfe1/home /d/vnfe1/home2 /d/vnfe2/home3; mount -a; df' foreach u (akeshet berciu mcliu joos pineda tupitsyn tzenova nodwell dvernon) cd ~$u; pwd end # As root@vnfe1 foreach u (akeshet berciu mcliu joos pineda tupitsyn tzenova nodwell dvernon) cd /home; ln -s /d/vnfe2/home/$u .; ls -ltd $u; cat /home/$u/.forward end # As matt@bh0 etc scp root@vnfe1:/etc/fstab fstab.vnfe1 scp root@vnfe3:/etc/fstab fstab.vnfe3 scp root@vn1:/etc/fstab fstab.node # No, /etc/fstab still f&*^ed up on vnfe1, amazingly fstab~ isn't! cd /etc Arc fstab [root@vnfe1]# diff fstab~ fstab 4,5d3 < /dev/sdb1 /home2 ext3 defaults 1 2 < /dev/sdc1 /home3 ext3 defaults 1 2 10,13c8,13 < vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 < vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 < vnfe4:/home /d/vnfe4/home nfs rw,bg,hard,intr 0 0 < bh0:/home /d/bh0/home nfs rw,bg,hard,intr 0 0 --- > /dev/sdc2 swap swap defaults 0 0 > /dev/sdb1 /home2 ext3 defaults 1 2 > vnfe3:/home /d/vnfe3/home nfs rw,bg,hard,intr 0 0 > vnfe3:/home2 /d/vnfe3/home2 nfs rw,bg,hard,intr 0 0 > vnfe4:/home /d/vnfe4/home nfs rw,bg,hard,intr 0 0 > bh0:/home /d/bh0/home nfs rw,bg,hard,intr 0 0 cp fstab~ fstab umount -a; mount -a # Add 8K rw option to all /etc/fstab cd /etc Arc fstab vi fstab /bg cw bg,rsize=8192,wsize=8192 n . vnNbgCommand 'cd /etc; Arc fstab; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab; umount -a -t nfs -l; mount -a; df' vnallCommand 'df' # Need to add vnfe2:/home2, and link to vnfe2:/home vnallCommand 'mkdir -p /d/vnfe2; cd /d/vnfe2; /bin/rm -rf home2; ln -s /d/vnfe4/home/vnfe2-tmp/home2 .; ls home2; cd home2; ls' etc vi fstab vnallCommand 'cd /d/vnfe2/home; ls; echo; cd /d/vnfe2/home2; ls; echo; echo' # OK ######################################################################## Mon Jan 17 15:33:09 PST 2005 ######################################################################## # pine/pico # As root@vnfe1 rpm -ivh /d/vnfe4/home/matt/RPMS/pine-4.61-1.i386.rpm error: Failed dependencies: libcrypto.so.4 is needed by pine-4.61-1 libssl.so.4 is needed by pine-4.61-1 vnallbgCommand 'rpm -ivh --nodeps /d/vnfe4/home/matt/RPMS/pine-4.61-1.i386.rpm' ######################################################################## Mon Jan 17 12:05:00 PST 2005 ######################################################################## # As root@vn1 vnMPImakeINTEL # As root@vn2 vnMPImakeINTEL icc -I. -I/var/tmp/install/mpich-1.2.6/src/fortran/src -I../include -I/var/tmp/install/mpich-1.2.6/src/fortran/include -I.. -I/var/tmp/install/mpich-1.2.6/include -I/var/tmp/install/mpich-1.2.6/include -I/var/tmp/install/mpich-1.2.6/mpid/ch_p4 -I/var/tmp/install/mpich-1.2.6/mpid/util -DLINUX -O3 -tpp6 -axK -DUSE_SOCKLEN_T -DUSE_U_INT_FOR_XDR -DHAVE_MPICHCONF_H -c abortf.c abortf.c(20): error: identifier "mpi_abort_" is undefined #pragma weak mpi_abort_ = pmpi_abort_ ^ compilation aborted for abortf.c (code 2) make[3]: *** [abortf.o] Error 2 make[2]: *** [flibs] Error 2 make[1]: *** [mpi-modules] Error 1 make: *** [mpi] Error 2 + false cd /var/tmp/install/mpich-1.2.6/src/fortran/src bu abortf.c vi abortf.c #else /* #pragma weak mpi_abort_ = pmpi_abort_ */ void mpi_abort_ ( MPI_Fint *comm, MPI_Fint *errorcode, MPI_Fint *__ierr ); #endif bu addressf.c vi addressf.c /* #pragma weak mpi_address_ = pmpi_address_ */ void mpi_address_ ( void *, MPI_Fint *, MPI_Fint * ); #endif bu allgatherf.c vi allgatherf.c # Intel code may need to be de-pragma-ed # As matt@bh0 cds vi de-pragma # Update distribution directory cda tar zxf mpich-1.2.6.tar.gz cd mpich-1.2.6/src/fortran/src de-pragma *.c mkdir With-pragma mv *.c.O With-pragma cda mv mpich-1.2.6.tar.gz mpich-1.2.6.tar.gz.O tz mpich-1.2.6 # Implemented 'make push' in matt@bh0:~/autoconf to ensure that # mpich-....tar.gz etc. get pushed out to other systems since # ??MPImake?? scripts generically use LOCAL tarball! make push # As root@vn[12] vnMPImakeINTEL cdi; cd mpich-1.2.6 make install # TODO C apps build, but not F77 #----------------------------------------------------------------------- # Nope, but from web, configuring with --disable-weak-symbols # should work # Modified vnMPImakeINTEL and remade on vn1 # As root@vn1 vnMPImakeINTEL # As matt@vn1 cdex iopt make clean make cpi Mpirun 4 cpi # OK make fpi Mpirun 4 fpi #----------------------------------------------------------------------- # Install Intel MPI on all vnallbgCommand 'vnMPImakeINTEL' # As matt@vn34 cd /d/vnfe1/home/matt/demo/perftest make clean iopt-mpich make mpptest cds cp vnMpptest-PG vnMpptest-INTEL vi vnMpptest-INTEL vnMpptest-INTEL vn35 vn36 # startup = 0.00 usec and transfer rate = 10.92 Mbytes/sec # Variance in fit = 0.020351 (smaller is better) plot 'mppout.gpl' using 4:5 with lines,\ 0.000000+0.091580*x with dots pause -1 "Press to continue" clear # Looks good. # OK # As matt@vn1 cdex make clean iopt-mpich make cpi test -f /tmp/cpi-test && /bin/rm /tmp/cpi-test foreach n (8 16 24 32 40 48 56 60) time Mpirun-notop $n cpi | tee -a /tmp/cpi-test end 1.140u 1.173s 0:07.81 29.5% 0+0k 0+0io 0pf+0w 1.680u 1.759s 0:11.71 29.2% 0+0k 0+0io 0pf+0w 2.195u 2.406s 0:15.47 29.6% 0+0k 0+0io 0pf+0w 2.722u 3.075s 0:19.69 29.4% 0+0k 0+0io 0pf+0w 3.306u 3.751s 0:24.40 28.8% 0+0k 0+0io 0pf+0w 3.932u 4.356s 0:27.66 29.9% 0+0k 0+0io 0pf+0w 4.157u 4.840s 0:29.55 30.4% 0+0k 0+0io 0pf+0w # Seems significantly slower that PGI # PAMR # As matt@vn1 cda RM -r pamr cvs co pamr cd pamr iopt-mpich configure --prefix=`pwd` make cd examples/wave cp ~matt/templates/pamr-wave.sh . pamr-wave.sh 9 # OK 17.165u 4.816s 0:42.18 52.0% 0+0k 0+0io 17pf+0w # vs PGI # 18.896u 4.947s 0:46.90 50.8% 0+0k 0+0io 3pf+0w foreach n (2 4 8 9 10 16 20 25 30 36 40 45 50 60) time pamr-wave.sh $n | tee -a /tmp/pamr-wave-test end 39.374u 4.991s 1:04.37 68.9% 0+0k 0+0io 0pf+0w 23.800u 4.623s 0:45.86 61.9% 0+0k 0+0io 18pf+0w 17.422u 4.765s 0:39.94 55.5% 0+0k 0+0io 1pf+0w 17.128u 4.758s 0:40.17 54.4% 0+0k 0+0io 0pf+0w 17.968u 4.545s 0:41.47 54.2% 0+0k 0+0io 0pf+0w 21.561u 5.832s 0:46.50 58.9% 0+0k 0+0io 0pf+0w 25.614u 5.680s 0:54.51 57.4% 0+0k 0+0io 0pf+0w 32.143u 6.093s 1:04.46 59.3% 0+0k 0+0io 0pf+0w 40.618u 6.923s 1:13.45 64.7% 0+0k 0+0io 0pf+0w 51.817u 7.514s 1:29.93 65.9% 0+0k 0+0io 1pf+0w 59.776u 8.385s 1:45.04 64.8% 0+0k 0+0io 1pf+0w 71.261u 9.100s 2:03.85 64.8% 0+0k 0+0io 0pf+0w 83.933u 10.081s 2:20.29 67.0% 0+0k 0+0io 0pf+0w 109.678u 11.722s 3:05.47 65.4% 0+0k 0+0io 1pf+0w # Also seems somewhat faster than MPI ######################################################################## Mon Jan 17 16:46:55 PST 2005 ######################################################################## #----------------------------------------------------------------------- # Rtop issue is path (bash doesn't pick up $HOMEMWC/scripts #----------------------------------------------------------------------- # As idle@vnfe1 crontab -e # Format of lines: #min hour daymo month daywk cmd 0-55/5 * * * * /d/vnfe1/home/matt/scripts/Rtop cd vi .bashrc # .bashrc HOMEMWC="/d/vnfe1/home/matt" PATH="$HOMEMWC/bin:$HOMEMWC/scripts:$PATH" ENV=$HOME/.bashrc USERNAME="root" export HOMEMWC USERNAME ENV PATH # Doesn't work ... best simply to hardcode appropriate path ######################################################################## Mon Jan 17 16:58:09 PST 2005 ######################################################################## # /var/tmp now on, remount /home as /scratch on all nodes vnNbgCommand 'cd /etc; Arc fstab; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab; mount -a' vnCommand 'cd /etc; Arc fstab; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab; mount -a; df; sleep 1' vnNbgCommand 'chmod 777 /scratch; chmod +t /scratch; ls -ltd /scratch' etc vi csh.cshrc vi bashrc vnDistEtc csh.cshrc profile ######################################################################## Thu Jan 20 14:01:17 PST 2005 ######################################################################## # xterm missing # Adding vnfe1:/usr/local # Adding vnfe1:/opt # # to vnfe1:/etc/exports # and vnfe3's and node /etc/fstab vnallbgCommand 'mkdir -p /d/vnfe1/usr/local; mkdir -p /d/vnfe1/opt' # As root@vnfe1 cd /d/vnfe1 rmdir opt cd usr rmdir local ln -s /usr/local . cd .. ln -s /var . # etc make import vi fstab.node fstab.vnfe1 fstab.vnfe3 scp fstab.vnfe1 root@vnfe1:/etc/fstab scp fstab.vnfe3 root@vnfe3:/etc/fstab vnNbgCommand 'mkdir -p /d/vnfe1/opt; mkidr -p /d/vnfe1/usr/local' vnallbgCommand 'cd /etc; Arc fstab; CP /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab' vnCommand 'mount -a; df' # xterm vnallbgCommand 'cd /d/vnfe4/home/Mandrake101/CD1/10.1/i586/media/main; rpm -ivh *xterm*' ######################################################################## Thu Jan 20 22:51:02 PST 2005 ######################################################################## # Following upgrade, following nodes were still down # # vn6 P/S # vn20 Recalitrant to net update # vn25 P/S # vn52 P/S # Three P/S's received, vn6, vn20 and vn52 all back, vn25 to shop # Ben had reported that vnfe1:/home3 was lost, but this does not # appear to be the case. # Have asked Pal to restore vnfe1:/home3, including those userids (wkb et al) # who were recently relocated there # Secondary install on vn6, vn20, vn52 # As root@vn6 ssh bh0 post-install | csh # Some hacking ... OK? # As root@vn20 OK? # As root@sh bh0 post-install | csh ssh mattbh0 post-install | csh OK? ######################################################################## Thu Jan 20 23:27:04 PST 2005 ######################################################################## # Need to update # root@{vnfe1,vnfe3,vn1}:/etc/ntp.conf # AS root@(vnfe1,vnfe3,vn1} cd /etc bu ntp.conf scp -q root@vnfe4.physics.ubc.ca:/home/VNFE1/etc/ntp.conf . ######################################################################## Fri Jan 21 09:53:39 PST 2005 ######################################################################## # Message to Maggie asking her to offload data from vnfe3:/home, # /d/vnfe3/home # TODO: Coop/WG project. Implement interface to SFU HSM. Mon Jan 24 17:08:59 PST 2005 Maggie again complained about problems with emacs from vnfe3, but may have due to botched /sratch configuration ######################################################################## Mon Jan 24 17:09:32 PST 2005 ######################################################################## # xv ssh bh0 which xv vnallCommand 'ls -ltd /usr/X11R6/bin/; ls -l /usr/X11R6/bin/xv' vnallCommand 'cd /usr/X11R6/bin/; scp bh0:/usr/X11R6/bin/xv .; ls -lt xv' ######################################################################## Tue Jan 25 19:13:45 PST 2005 ######################################################################## # Ingrid reports that # 1) automount configuration not recovered # 2) 'bc' is missing # Automount # As matt@vnfe1 etc Arc auto.* scp -q root@vnfe4.physics.ubc.ca:/home/VNFE1/etc/auto.master . scp -q root@vnfe4.physics.ubc.ca:/home/VNFE1/etc/auto.misc . vnDistEtc auto.master vnDistEtc auto.misc vnallbgCommand '/etc/rc.d/init.d/autofs start' vnallbgCommand '/sbin/chkconfig autofs on' vnallCommand 'cd /misc/ASTRO; ls' vnallCommand 'cd /misc/pulsar1; ls' vnallCommand 'cd /misc/pulsar2; ls' # bc vnallbgCommand 'cd /d/vnfe4/home/Mandrake101/CD1/10.1/i586/media/main; rpm -ivh bc*' vnCommand 'which bc' # Fix up secondary install script vpi # OK ######################################################################## Wed Jan 26 11:17:32 PST 2005 ######################################################################## # Running pamr/wave as idle@vnfe1 to test out current parallel # configuration # Should provide all users with a .rhosts file # /etc/csh.cshrc etc make import scp root@vnfe1:/etc/csh.cshrc csh.cshrc.vnfe1 diff csh.cshrc.vnfe1 csh.cshrc # OK vi csh.cshrc #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test -f $HOME/.rhosts || mkvnrhosts > $HOME/.rhosts #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ vnDistEtc csh.cshrc ssh idle@vnfe1 # OK, ~/.rhosts created, remove, set shell to bash vi profile #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test -f $HOME/.rhosts || mkvnrhosts > $HOME/.rhosts #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ vnDistEtc profile Wed Jan 26 16:33:52 PST 2005 # As idle@vnfe1 poptp cd pamr/examples/wave; make clean; make ######################################################################## Thu Jan 27 11:13:31 PST 2005 ######################################################################## # ntp server not installed on vnfe3 [root@vnfe1]# rpm -qa | grep ntp chkfontpath-1.9.10-1mdk ntp-client-4.2.0 ntp-4.2.0-9mdk # As root@vnfe3 cd /d/vnfe4/home/Mandrake101/CD1/10.1/i586/media/main rpm -ivh ntp* chkconfig ntpd on ######################################################################## Fri Jan 28 09:41:29 PST 2005 ######################################################################## # Getting Roland's environment sane on vn (has, e.g., copied vnp4 .csrhc # so, e.g. HOMEMWC -> /d/vnfe4/home/matt instead of HOMEMWC -> /d/vnfe3/home/matto # As roland@vnfe1 mkdir mwc-work cd !$ tar zxf ~matt/examples.tar.gz cd examples make all # So IS there a problem with vn20 Will execute 'ssh vn20 cd /d/vnfe1/home/roland/mwc-work/examples/cpi-mpi-pgi; time mpirun -np 4 -machinefile mfile cpi ' Will use the following machine file vn37 vn38 vn39 p0_686: p4_error: net_create_slave: execlp: -1 p0_686: p4_error: interrupt SIGINT: 2 bm_list_687: p4_error: interrupt SIGINT: 2 0.161u 0.281s 0:00.52 84.6% 0+0k 0+0io 1pf+0w # So now cd mwc-work/examples make all # works # ... Yes, send Maggie a thank you message, take vn20 out of the pack and have Pal and Ben reinstall with # vn25 # For the time being, run Lock in the bg as root # As root@vn20 Lock & # ... or perhaps it's software. # ... vn52 doesn't have correct /etc/ld.so.conf for Intel/PGI compilers. # As matt@bh0 vpi # Disabled reboot # ... ld.so.conf had not been "installed" in /d/vnfe4/home/matt/system/vn-10.1/image/master/etc/ # As root@vn52 ssh matt@bh0 post-install | csh # Same deal with vn20, vn6, others vnCommand 'wc /etc/ld.so.conf' 7 8 154 /etc/ld.so.conf # As root@{vn20,vn6} ssh matt@bh0 post-install | csh # vn20 crashed during the post-installation ######################################################################## Fri Jan 28 15:38:01 PST 2005 ######################################################################## # Post installation on vn25 # As root@vn25 ssh matt@bh0 post-install | csh reboot ping vn25 # As root@vn25 jj ntp ntptimeset # As matt@vn25 # remove vn25 from known_host # OK # vn20 still has problems (connection confused in MPI run, although # did get one 64 proc run of cpi through), swapped DIMMS # Vnrun # Running as root, so have to kill ~/.ssh/known_hosts on ALL machines vnCommand '/bin/rm /root/.ssh/known_hosts' vnCommand 'cd /root; scp vnfe1:/root/.rhosts .; wc .rhosts' vn52 Permission denied. # ??? vnCommand 'tail -20 /var/log/messages' > /tmp/log # Indications that there may be a problem with rsh (MPI error message, # jobs don't complete) # /usr/bin/rsh.netkit /usr/bin/rsh identical rsh-0.17-13mdk ######################################################################## Fri Jan 28 17:22:49 PST 2005 ######################################################################## # vn16 rebooted, logs show Jan 28 14:09:51 vn16 kernel: APIC error on CPU0: 00(08) ######################################################################## Fri Jan 28 18:00:36 PST 2005 ######################################################################## # Timeout in waiting for processes to exit. This may be due to a defective # rsh program (Some versions of Kerberos rsh have been observed to have this # problem). # This is not a problem with P4 or mpich but a problem with the operating # environment. For many applications, this problem will only slow down # process termination. # What does this mean? # A: If anything causes the rundown in MPI_Finalize to take more than about 5 # minutes, it becomes suspicious of the rsh implementation. The rsh used with some Kerberos installations assumed that sizeof(FD_SET) == sizeof(int). This meant that the rsh program assumed that the largest FD value was 31. When a program uses fork to create processes that launch rsh, while maintaining the stdin, stdout, and stderr to the forked process, this assumption is no longer true, since the FD that rsh creates for the socket may be greater than 31 if there are enough processes running. When using such a broken implementation of rsh, the symptom is that jobs never terminate because the rsh jobs are waiting (with select) for the socket to close. # TODO # The ch_p4mpd device eliminates this problem. ######################################################################## Sun Jan 30 13:37:44 PST 2005 ######################################################################## # Transmutation of vn20 and vn64, so that, with luck, vn1 - vn63 # inclusive are happy # # SEE README.CRASH (CRASH_???) ######################################################################## Tue Feb 8 01:12:21 PST 2005 ######################################################################## # Scott H. points out that # # 1) vn52:/scratch # # doesn't have /tmp permissions # # 2) (inadvertently) that on-line doc re mkvnrhosts should be updated # so that users know to set proper permissions on ~/.rhosts file? # vnallbgCommand 'mkdir -p /scratch; chmod a+w /scratch; chmod +t /scratch; ls -ltd /scratch' > /tmp/scratch sleep 60 ######################################################################## Tue Feb 8 10:46:45 PST 2005 ######################################################################## # vn52:/scratch STILL has the wrong permissions per Scott's next msg vnallbgCommand 'mkdir -p /scratch; chmod 777 /scratch; chmod +t /scratch; hostname; ls -ltd /scratch' > /tmp/scratch sleep 30 vi /tmp/scratch # umask IS getting set to 2, per Scott's message, must be in start up # *directory* (sigh) # csh.login! (rookie mistake) # ... which we will simply eliminate. # Some nodes still have /home, which is a problem vn6 vn25 vn52 vn64 # Quick fix is to set umask in .cshrc, but better fix is to # use stub csh.login ######################################################################## Sat Feb 12 04:49:34 PST 2005 ######################################################################## # vn64 down again # See README.CRASH (CRASH) # Suggest that we run with 1-63, and replace vn64 with bh2 # As matt@bh0 'cat /proc/cpuinfo | grep -i hz' >>> Executing as root@bhtest1.physics.ubc.ca ssh: connect to host bhtest1.physics.ubc.ca port 22: No route to host >>> Executing as root@bh0.physics.ubc.ca cpu MHz : 1666.740 cpu MHz : 1666.740 >>> Executing as root@bh1.physics.ubc.ca cpu MHz : 451.030 cpu MHz : 451.030 >>> Executing as root@bh2.physics.ubc.ca cpu MHz : 851.951 cpu MHz : 851.951 >>> Executing as root@bh3.physics.ubc.ca cpu MHz : 451.027 cpu MHz : 451.027 >>> Executing as root@bh4.physics.ubc.ca cpu MHz : 451.027 cpu MHz : 451.027 >>> Executing as root@bh5.physics.ubc.ca cpu MHz : 451.031 cpu MHz : 451.031 >>> Executing as root@bh6.physics.ubc.ca cpu MHz : 451.025 cpu MHz : 451.025 >>> Executing as root@bh7.physics.ubc.ca cpu MHz : 1004.552 cpu MHz : 1004.552 >>> Executing as root@bh8.physics.ubc.ca cpu MHz : 1004.542 cpu MHz : 1004.542 >>> Executing as root@bh9.physics.ubc.ca model name : Intel(R) Pentium(R) 4 CPU 3.00GHz cpu MHz : 2992.581 # Hi Pal and Ben: vn64 died again, with an exception in an I/O request. It's probably either the mobo or the i/o subsystem, but in any case, given the relative lack of use of the old cluster, I don't want to fuck with it any more at the current time. So please do the following ASAP, after giving Bruno 24 hours As usual, if you can't do it ASAP, please let me know, and give me your availability over the next week. 0a) The top-level goal here is to swap bh2 and vn64 0b) The top-level constraints and the measures of how good a job you are doing are i) Not to fuck up / lose any data on bh2 (Bruno's machine) ii) To get this done as quickly as possible. If you know what you are doing and spend a little time getting organized it could take as long as 2 hours. If it starts taking 5, you're not focusing on what you are going. 1) Put a message up on bh2 when you start archiving bh2 (presumably to bh9 ... you may need to clean up on bh9, if that won't work, then to vnfe4). I'm CC-ing Bruno on this so he should be apprised that this has happened. Copy /home /home2 2) Convert vn64 to bh2. This should be trivial. I've already pulled it out of the cluster; just boot it up single,. change the IP and hostname, boot it up a couple of more times for good measure, and PAY SPECIAL ATTENTION TO /etc/hosts. Sometimes an alias for an old hostname will remain in the localhost address; ensure that the 127.0.0.1 address will de-resolve localhost AND NOTHING ELSE. 3) Copy ############################################################ Tue Feb 22 15:37:10 PST 2005 ############################################################ # New GROUP accounts # # p55505g1 # p55505g2 # p55505g3 # p55505g4 # p55505g5 # From lnx1 !!ssh root@lnx1 "grep '^p55505g' /etc/passwd" p55505g2:x:30002:100:PHYS 555B Spring 2005 Group 2:/d/lnx1/home/p55505g2:/bin/tcsh p55505g3:x:30003:100:PHYS 555B Spring 2005 Group 3:/d/lnx2/home/p55505g3:/bin/tcsh p55505g4:x:30004:100:PHYS 555B Spring 2005 Group 4:/d/lnx3/home/p55505g4:/bin/tcsh p55505g1:x:30001:100:PHYS 555B Spring 2005 Group 1:/d/lnx1/home/p55505g1:/bin/tcsh p55505g5:x:30005:100:PHYS 555B Spring 2005 Group 5:/d/lnx3/home/p55505g5:/bin/tcsh !!ssh root@lnx1 "grep '^p55505g' /etc/shadow" p55505g1:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g2:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g3:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g4:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g5:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: vi README.USERS 30001 p55505g1 # PHYS 555B Spring 2005 Group 1 (TBA) 30002 p55505g2 # PHYS 555B Spring 2005 Group 2 (TBA) 30003 p55505g3 # PHYS 555B Spring 2005 Group 3 (TBA) 30004 p55505g4 # PHYS 555B Spring 2005 Group 4 (TBA) 30005 p55505g5 # PHYS 555B Spring 2005 Group 5 (Choptuik/Kelleher) nu setenv NU p55505 df | grep vnfe1 vnfe1:/home 10958176 6369664 4588512 59% /d/vnfe1/home vnfe1:/home2 17496688 14139408 3357280 81% /d/vnfe1/home2 vnfe1:/home3 17496688 8672840 8823848 50% /d/vnfe2/home cat<${NU} p55505g1:x:30001:20000:PHYS 555B Spring 2005 Group 1:/d/vnfe1/home2/p55505g1:/bin/tcsh p55505g2:x:30002:20000:PHYS 555B Spring 2005 Group 2:/d/vnfe1/home2/p55505g2:/bin/tcsh p55505g3:x:30003:20000:PHYS 555B Spring 2005 Group 3:/d/vnfe1/home2/p55505g3:/bin/tcsh p55505g4:x:30004:20000:PHYS 555B Spring 2005 Group 4:/d/vnfe1/home2/p55505g4:/bin/tcsh p55505g5:x:30005:20000:PHYS 555B Spring 2005 Group 5:/d/vnfe1/home/p55505g5:/bin/tcsh END vnNewUsers ${NU} # Fixed up phys410@vnfe1:~/{.cshrc,.profile} setenv NUSERS "p55505g1 p55505g2 p55505g3 p55505g4 p55505g5" foreach u ($NUSERS) ssh ${u}@vnfe1 '/bin/cp ~phys410/.cshrc .; /bin/cp ~phys410/.profile .' ssh ${u}@vnfe1 '/bin/cp ~phys410/.aliases .; /bin/cp ~phys410/.aliases.bash .' ssh ${u}@vnfe3 'ls -alt .cshrc .aliases .profile .aliases.bash' ssh ${u}@vn35 'ls -alt .cshrc .aliases .profile .aliases.bash' end #Verified all logins, change passwd to mine !!ssh root@bh0 grep -i matt /etc/shadow matt:$1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31:10897:0:99999:7::: '$1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31' -> 'the usual these days' etc sola; vs # $1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31 vnDistEtc shadow foreach NU ($NUSERS) ssh root@vnfe1 "cd ~${NU}; echo ${NU}@lnx1.ubc.ca > .forward; chown ${NU}.phys410 .forward; ls -al; finger ${NU}" end foreach NU ($NUSERS) foreach m (vnfe1 vnfe3 vn62) ssh ${NU}@${m} 'hostname; echo $USER; date' end end d p55505g1 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g2 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g3 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g4 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g5 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh # Verified foreach NU ($NUSERS) ssh $NU@vnfe1 date end # OK ... can get going with exercises and updates # p55505g1 # p55505g2 # p55505g3 # p55505g4 # p55505g5 # From lnx1 !!ssh root@lnx1 "grep '^p55505g' /etc/passwd" p55505g2:x:30002:100:PHYS 555B Spring 2005 Group 2:/d/lnx1/home/p55505g2:/bin/tcsh p55505g3:x:30003:100:PHYS 555B Spring 2005 Group 3:/d/lnx2/home/p55505g3:/bin/tcsh p55505g4:x:30004:100:PHYS 555B Spring 2005 Group 4:/d/lnx3/home/p55505g4:/bin/tcsh p55505g1:x:30001:100:PHYS 555B Spring 2005 Group 1:/d/lnx1/home/p55505g1:/bin/tcsh p55505g5:x:30005:100:PHYS 555B Spring 2005 Group 5:/d/lnx3/home/p55505g5:/bin/tcsh !!ssh root@lnx1 "grep '^p55505g' /etc/shadow" p55505g1:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g2:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g3:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g4:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: p55505g5:$1$3MBkHd2y$nsZShgtmakO25VzgzPf5P.:12775:-1:99999:-1::: vi README.USERS 30001 p55505g1 # PHYS 555B Spring 2005 Group 1 (TBA) 30002 p55505g2 # PHYS 555B Spring 2005 Group 2 (TBA) 30003 p55505g3 # PHYS 555B Spring 2005 Group 3 (TBA) 30004 p55505g4 # PHYS 555B Spring 2005 Group 4 (TBA) 30005 p55505g5 # PHYS 555B Spring 2005 Group 5 (Choptuik/Kelleher) nu setenv NU p55505 df | grep vnfe1 vnfe1:/home 10958176 6369664 4588512 59% /d/vnfe1/home vnfe1:/home2 17496688 14139408 3357280 81% /d/vnfe1/home2 vnfe1:/home3 17496688 8672840 8823848 50% /d/vnfe2/home cat<${NU} p55505g1:x:30001:20000:PHYS 555B Spring 2005 Group 1:/d/vnfe1/home2/p55505g1:/bin/tcsh p55505g2:x:30002:20000:PHYS 555B Spring 2005 Group 2:/d/vnfe1/home2/p55505g2:/bin/tcsh p55505g3:x:30003:20000:PHYS 555B Spring 2005 Group 3:/d/vnfe1/home2/p55505g3:/bin/tcsh p55505g4:x:30004:20000:PHYS 555B Spring 2005 Group 4:/d/vnfe1/home2/p55505g4:/bin/tcsh p55505g5:x:30005:20000:PHYS 555B Spring 2005 Group 5:/d/vnfe1/home/p55505g5:/bin/tcsh END vnNewUsers ${NU} # Fixed up phys410@vnfe1:~/{.cshrc,.profile} setenv NUSERS "p55505g1 p55505g2 p55505g3 p55505g4 p55505g5" foreach u ($NUSERS) ssh ${u}@vnfe1 '/bin/cp ~phys410/.cshrc .; /bin/cp ~phys410/.profile .' ssh ${u}@vnfe1 '/bin/cp ~phys410/.aliases .; /bin/cp ~phys410/.aliases.bash .' ssh ${u}@vnfe3 'ls -alt .cshrc .aliases .profile .aliases.bash' ssh ${u}@vn35 'ls -alt .cshrc .aliases .profile .aliases.bash' end #Verified all logins, change passwd to mine !!ssh root@bh0 grep -i matt /etc/shadow matt:$1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31:10897:0:99999:7::: '$1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31' -> 'the usual these days' etc sola; vs # $1$s5DjbFBh$.0Yh6z8MYwqxxVqGWwlM31 vnDistEtc shadow foreach NU ($NUSERS) ssh root@vnfe1 "cd ~${NU}; echo ${NU}@lnx1.ubc.ca > .forward; chown ${NU}.phys410 .forward; ls -al; finger ${NU}" end foreach NU ($NUSERS) foreach m (vnfe1 vnfe3 vn62) ssh ${NU}@${m} 'hostname; echo $USER; date' end end d p55505g1 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g2 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g3 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g4 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh d p55505g5 mkdir -p .ssh; cd .ssh; touch authorized_keys; ssh matt@bh0.physics.ubc.ca cat /d/bh0/home/matt/.ssh/id_rsa.pub >> authorized_keys; chmod -R og-rwx ~/.ssh # Verified foreach NU ($NUSERS) ssh $NU@vnfe1 date end # OK ... can get going with exercises and updates # DUPLICATED ACCOUNTS ON vnfe4 # OK # As root@vnfe1 ls -ltd /d/*/home*/p55505g? drwxr-xr-x 3 p55505g5 phys410 4096 Feb 22 16:58 /d/vnfe4/home/p55505g5/ drwxr-xr-x 3 p55505g4 phys410 4096 Feb 22 16:58 /d/vnfe4/home/p55505g4/ drwxr-xr-x 3 p55505g3 phys410 4096 Feb 22 16:58 /d/vnfe4/home/p55505g3/ drwxr-xr-x 3 p55505g2 phys410 4096 Feb 22 16:57 /d/vnfe4/home/p55505g2/ drwxr-xr-x 3 p55505g1 phys410 4096 Feb 22 16:57 /d/vnfe4/home/p55505g1/ drwxr-xr-x 4 p55505g5 phys410 4096 Feb 22 16:50 /d/vnfe1/home/p55505g5/ drwxr-xr-x 4 p55505g4 phys410 4096 Feb 22 16:50 /d/vnfe1/home2/p55505g4/ drwxr-xr-x 4 p55505g3 phys410 4096 Feb 22 16:50 /d/vnfe1/home2/p55505g3/ drwxr-xr-x 4 p55505g2 phys410 4096 Feb 22 16:50 /d/vnfe1/home2/p55505g2/ drwxr-xr-x 4 p55505g1 phys410 4096 Feb 22 16:50 /d/vnfe1/home2/p55505g1/ ############################################################ Thu Mar 3 09:04:40 PST 2005 ############################################################ # vnDistEtc doesn't propagate /etc/motd to vn25 (vn25 was # unstable), fixed # As matt@vnfe1 cds cp vnDistEtc vnDistEtc.2005.03.03 vi vnDistEtc vnDistEtc motd # OK ############################################################ Sat Mar 19 12:24:22 PST 2005 ############################################################ # Nasty hacking of available nodes in # matt@vnfe1:~/scripts/mp_func killed vnrun # However, with vn64 and vn6 down and vn63 pending ... # As idle@vnfe1 ... cd examples/cpi-mpi-intel make vnrun -n 62 cpi # Timeout in waiting for processes to exit, 4 left. This may be due to a defective rsh program (Some versions of Kerberos rsh have been observed to have this problem). This is not a problem with P4 or MPICH but a problem with the operating environment. For many applications, this problem will only slow down process termination. pi is approximately 3.1415926539002346, Error is 0.0000000003104415 wall clock time = 0.063194 3.590u 4.671s 1:39.23 8.3% 0+0k 0+0io 0pf+0w # Hacked on vnkillall, should add [-b] to run in background to entire # suite (date; vnrun -n 60 cpi; date) | tee /tmp/log # on the other hand Sat Mar 19 12:34:42 PST 2005 Will execute 'ssh -x vn11 cd /d/vnfe2/home/idle/examples/cpi-mpi-intel; time mpirun -np 60 -machinefile mfile cpi ' Will use the following machine file vn12 vn13 vn17 vn26 vn27 vn38 vn40 . . . vn59 vn18 vn20 vn56 vn53 vn55 pi is approximately 3.1415926539002341, Error is 0.0000000003104410 wall clock time = 0.363065 3.516u 4.367s 0:46.26 17.0% 0+0k 0+0io 10pf+0w Sat Mar 19 12:35:35 PST 2005 # ... so finishes in a little over a minute ... woo, hoo # now hacking on matt@vnfe1:~/scripts/Export trying to get a cshell # pattern to a remote shell vnallbgCommand "cd /d/vnfe1/home/matt/scripts; /bin/cp -r {vnkillall,vnrun,vnN,vnCommand,Konsole,mkvnrhosts,Rtop,Rtop-node,nth,lino,number,lines,vnTop} /usr/local/bin" # ... which wasn't THAT painful. # OK, seems to basically alive # ... and now have just noticed that vn31 is down --- they're dropping like flies!! # ... put unavailable list up, and carry on (start reserving vn62) # As matt@vnfe1 cdex; cd cpi-mpi-intel; make clean; make; vnrun -n 40 cpi a ptest 'setenv NP \!*; cdex; cd cpi-mpi-intel; make clean; make; echo "+++ Starting run on $NP processors"; setenv D0 `date`; date; sleep 2; vnrun -n $NP cpi; echo $D0; date' # ... and I better get out of here! ############################################################ Tue Mar 29 21:14:54 PST 2005 ############################################################ # vn63 has drained and have swapped with vn6, vn31 apparently # needs power supply # "new" vn63 and vn64 have been pulled from cluster # 1. Reset rwho /var/spool/rwho vnCommand 'ls -lR /var/spool/rwho' > /tmp/rwho vnallCommand '/bin/rm -f /var/spool/rwho/*' # 2. Backup and remove known_hosts # As root@vnfe1 LS /d/*/home*/*/.ssh/known_hosts | dub | pre cp -a > /tmp/so vi /tmp/so # With 216 users need a script bu_known_hosts /bin/rm -f /d/*/home*/*/.ssh/known_hosts # 3. Update motd and TODO web page motd vi motd.2005.03.29 cp !$ motd vnDistEtc motd # 4. Check on status vnCommand 'echo "--- up"; up; echo "-- down"; down' | tee /tmp/updown # OK # vn31 apparently needs new power supply? # Perhaps not? Has gone into shop. Sun Apr 10 13:38:09 PDT 2005 # Upon pinging Pal and Ben, found out that vn31 had come back from the # shop, that nothing had been diagnosed, and that it had been reinsertewd # into the cluster # # thanks for letting me know, guys # As root@vn31 uptime 06:38:22 up 3 days, 22:59, 1 user, load average: 1.00, 1.00, 1.00 # So at least someone (pineda) noticed that it was up jj ntp date Sun Apr 10 06:38:54 PDT 2005 vnSetdate; service ntpd restart; ntptimeset vnResetdate ############################################################ Tue Apr 5 15:56:35 PDT 2005 ############################################################ # Gerald Lim reports that vnfe1:/home2 has been filled. # Looks like aaron is the culprit ... # As root@vnfe1 cd /home2 usage - cd ~aaron cd ~aaron/leapfrog # Unfortunately, for the time being will have to kill aaron's jobs # and relocate his files # As root@vnfe1 Kill aaron # As root@vnfe4 cd /home/aaron/leapfrog mv -i /d/vnfe1/home2/aaron/leapfrog/wts-runs . # As root@vnfe1 su aaron cd ~/leapfrog ln -s /d/vnfe4/home/aaron/leapfrog/wts-runs . ############################################################ Wed Apr 6 16:16:24 PDT 2005 ############################################################ # Maggie reports problem on vnfe3 From maggie@math.ubc.ca Wed Apr 6 16:16:39 2005 To: choptuik@physics.ubc.ca Subject: vnfe3 problem Dear Matt, vnfe3 seems has problem. I got the following error: vn16.physics.ubc.ca: Connection refused vn16.physics.ubc.ca: Connection refused vn17.physics.ubc.ca: Connection refused vn17.physics.ubc.ca: Connection refused vn18.physics.ubc.ca: Connection refused vn18.physics.ubc.ca: Connection refused vn1.physics.ubc.ca: Connection refused vn1.physics.ubc.ca: Connection refused vn20.physics.ubc.ca: Connection refused # As matt@vnfe1 ptest 30 Process 17 on vn12.physics.ubc.ca: n_intervals=-1 Process 20 on vn16.physics.ubc.ca: n_intervals=-1 Process 23 on vn20.physics.ubc.ca: n_intervals=-1 Process 26 on vn27.physics.ubc.ca: n_intervals=-1 Process 28 on vn29.physics.ubc.ca: n_intervals=-1 Process 24 on vn24.physics.ubc.ca: n_intervals=-1 pi is approximately 3.1415926539002337, Error is 0.0000000003104406 wall clock time = 0.147358 1.787u 2.011s 0:32.24 11.7% 0+0k 0+0io 13pf+0w Wed Apr 6 16:15:59 PDT 2005 Wed Apr 6 16:16:34 PDT 2005 # OK ptest 48 ############################################################ Fri Apr 8 06:26:47 PDT 2005 ############################################################ # Maggie still having problems, getting connection refused # messages that I *can* reproduce using her account. # # Affected nodes # vn29 # vn43 # Wrote vnIPtoName, needs to be exported # As matt@vnfe1 make export # Wrote /d/vnfe1/home/matt/examples/cpi-mpi-intel/Scan # to check master -> node MPI connectivity # Looks fine now, at least as Matt, try as Maggie # OK ############################################################ Fri Apr 8 12:57:10 PDT 2005 ############################################################ # TODO: Need f90 version of mpi libraries, examples ############################################################ Sun Apr 10 06:18:22 PDT 2005 ############################################################ # vn25 has load average of merely 159, not surprisingly, # sshd is refusing connections # # See README.CRASH (CRASH_179) ############################################################ Sun Apr 10 07:28:04 PDT 2005 ############################################################ # vn6 down BLUNDER! In my de-cabling zeal of vn6 (to which # the KVM cables were attached) I de-power cabled it. # Sorry, whomever # See README.CRASH (CRASH_180) Sun Apr 10 09:29:58 PDT 2005 # Have coded 'whowason' which seems to be working OK # alistair on vn6 # No one on vn25? whowason vn25 2000 2005:04:08:1030.00:103: vn25 24907 maggie 18 0 497m 193m 4004 D 3.9 38.5 0:06.58 cfdildm 2005:04:10:0755.00:126: vn25 12304 idle 18 0 1992 864 1788 R 3.9 0.2 0:00.02 top # Send message to alistair ############################################################ Sun Apr 10 21:34:11 PDT 2005 ############################################################ # Reinserting vn31 in cluster # As matt@vnfe1 viw vnN vnN | wc 62 62 921 # MPI test cdex Scan vn62 # OK ############################################################ Fri Apr 15 07:35:41 PDT 2005 ############################################################ # vn25 down vn25 down 1+09:53 # Ben rebooted and it came back OK (I'm in Banff @ BIRS) ############################################################ Mon Apr 18 08:16:38 PDT 2005 ############################################################ # vn25 down # Message to Ben and Pal Mon Apr 18 19:02:55 PDT 2005 # Amazingly fast, only took 11 hours! Not! ############################################################ Tue Apr 19 08:21:32 PDT 2005 ############################################################ # Guess what! vn25 down down vn25 down 4:17 # TODO # Message to Ben and Pal, leave it, will look at it when I get back # Ben revived by swapping the P/S with one of vn63/vn64, looks like # vn16 may have accidentally been rebooted at the same time? # As matt@vnfe1 viw vnN ############################################################ Tue Apr 19 16:07:33 PDT 2005 ############################################################ # Reconfigure/build mpi with Fortran 90 cds Arc vnMPImakeINTEL setvars vi vnMPImakeINTEL setvars # As root@vnfe1 vnMPImakeINTEL cdi; cd mpich-1.2.6 make install # Coded /home/matt/examples/f90pi-mpi-intel example # OK ############################################################ Thu Apr 21 06:38:34 PDT 2005 ############################################################ # Move aadutton, akeshet, ehonda to vnfe4 # As root@vnfe4 cd /home foreach u (aadutton akeshet ehonda) mv $u $u.O end mv /d/vnfe1/home/aadutton /home & mv /d/vnfe1/home2/akeshet /home & mv /d/vnfe1/home/ehonda /home & # As matt@vnfe1 etc scp root@vnfe1:/etc/passwd passwd.vnfe1 diff !$ passwd vi passwd # Make mods vnDistEtc passwd # As root@vnfe1 ln -s /d/vnfe4/home/aadutton /d/vnfe1/home/aadutton ln -s /d/vnfe4/home/akeshet /d/vnfe1/home2/akeshet ln -s /d/vnfe4/home/ehonda /d/vnfe1/home/ehonda ############################################################ Sun Apr 24 12:26:23 PDT 2005 ############################################################ # Dave having some problems with mpi on vnfe1 vn1 vn31 vn49 vn11 # Apparently some problem with vn22? vn43 # Have coded ~/scripts/nodes # which aids in keeping node list updated (pbsnodes facsimile would # be better) ############################################################ Mon Apr 25 12:14:01 PDT 2005 ############################################################ # vn43 down see README.CRASH (CRASH_185) ############################################################ Fri Apr 29 10:02:59 PDT 2005 ############################################################ # Updating postfix main.cf # As matt@vnfe1 cd /home/matt/system/vn/image/master/etc/postfix scp root@vnfe1:/etc/postfix/main.cf main.cf.orig scp root@bh0:/etc/postfix/main.cf main.cf.bh0 vnCommand 'chkconfig --list | grep postfix' >>> Executing as root@142.103.237.1 postfix 0:off 1:off 2:on 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.2 postfix 0:off 1:off 2:on 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.3 postfix 0:off 1:off 2:on 3:on 4:on 5:on 6:off >>> Executing as root@142.103.237.4 postfix 0:off 1:off 2:on 3:on 4:on 5:on 6:off cp main.cf.bh0 main.cf vnallCommand 'cd /etc/postfix; Arc main.cf; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/postfix/main.cf .; service postfix restart' vnallCommand 'grep relayhost /etc/postfix/main.cf' ############################################################ Fri Apr 29 14:10:49 PDT 2005 ############################################################ # Mailman running on cluster? vnallCommand 'chkconfig --list | grep mailman' vnallbgCommand 'chkconfig mailman off; service mailman stop' ############################################################ Wed May 11 09:10:39 PDT 2005 ############################################################ # Mounting /d/bh9 so that /Public can be seen from cluster # As matt@vnfe1 etc make import vi fstab.vnfe[13] fstab.node bh9:/home /d/bh9/home nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 # Coded 'fstab' target in Makefile FSTAB_FILES= fstab.vnfe1 fstab.vnfe3 fstab.node FSTAB_NODE_FULL_PATH = /d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab: $(FSTAB_FILES) scp root@vnfe1:/etc/fstab .fstab.vnfe1 scp fstab.vnfe1 root@vnfe1:/etc/fstab scp root@vnfe3:/etc/fstab .fstab.vnfe3 scp fstab.vnfe3 root@vnfe3:/etc/fstab vnNbgCommand "cd /etc; Arc fstab; /bin/cp $(FSTAB_NODE_FULL_PATH) fstab" vnallbgCommand 'mkdir -p /d/bh9/home' vnallbgCommand 'mount -a' # ... : Permission denied # As root@bh9 vi /etc/exports foreach i (`iota 64) echo "vn${i}.physics.ubc.ca \\" | tee -a /tmp/so end vi /etc/exports :r /tmp/so exportfs -av service xinetd restart # As matt@vnfe1 vmCommand 'mount -a ; sleep 2; df' # OK vnallbgCommand 'ln -s /d/bh9/home/laplace/usr2/Public /Public' vnCommand 'cd /Public; pwd; ls' # Making README.CRASH public 5+(21/52.); years = 5.40 * 64; = 346 / 185.; = 1.87 yrs/node-crash; = Mean time between node crashes 1.9 years! ############################################################ Wed May 11 11:10:08 PDT 2005 ############################################################ # Aaron reporting problems working through the MPI examples # per the web pages # As root@vnfe1 cd ~aaron/.ssh more authorized_keys # Am in there ... # As aaron@vnfe1 vi ~/.cshrc # Munged path set path=(. ~/bin $HOMEMWC/scripts $path) # OK ############################################################ Thu May 19 13:38:44 PDT 2005 ############################################################ 1829 tli # New account for Tingwen Li (UBC MECH GS Salcudean) # :n README.USERS # /tli 1829 tli # UBC MECH GS (Salcudean) nu setenv NU tli cat<${NU} ${NU}:x:1829:1800:Tingwen Li:/d/vnfe3/home/${NU}:/bin/bash END vnNewUsers ${NU} As root@vnfe1 cp ~phys410/{.profile,.aliases.bash} ~tli; chown -R tli.salcudea ~tli ssh ${NU}@vnfe1 ssh ${NU}@vn35 date # OK etc sola; vs # $1$PoiwwR1S$Be8ZVwiFOKNohHQkBKqFt. vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo litw@interchange.ubc.ca > .forward; chown ${NU}.salcudea .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Sat May 28 16:46:30 PDT 2005 ############################################################ # New account for Nick Fameli # As choptuik@physics.ubc.ca finger fameli Login name: fameli In real life: Nicola Fameli Directory: /home/fameli Shell: /bin/tcsh Last login Thu May 26 18:09 on pts/37 from v139-157.physic No unread mail No Plan. sudo pwentry :x:493:307:Nicola Fameli:/home/fameli:/bin/tcsh fameli:Xbkafj3pMADXw:11515::::::-1 grep 493 /etc/passwd # :n README.USERS # /tli 493 nico # nu setenv NU nico cat<${NU} ${NU}:x:493:9000:Nicola Fameli:/d/vnfe1/home/${NU}:/bin/tcsh END vnNewUsers ${NU} As root@vnfe1 cp ~phys410/{.cshrc,.aliases} ~${NU}; chown -R nico.other ~${NU} ssh ${NU}@vnfe1 ssh ${NU}@vn35 date # OK etc sola; vs # Xbkafj3pMADXw vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo fameli@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ############################################################ Thu Jun 2 09:53:57 PDT 2005 ############################################################ # Kevin reports problem with cvs@vnfe1 # Problem: vnfe1:/home2/cvs is owned by matt! # As root@vnfe1 chown -R cvs.choptuik /home2/cvs # OK ############################################################ Thu Jun 2 17:17:21 PDT 2005 ############################################################ # Aaron reports problem with # vn10:/usr/local/intel/bin/mpif90 # which I verify # Temporarily hack vnCommand to execute as matt vnCommand 'hostname; cd /d/vnfe1/home/matt/examples/f90pi-mpi-intel; make clean; make' | tee /tmp/vnCommand-intel-mpif90 viw vnMPImakeINTEL # ... and sure enough, in ~matt/scripts/setvars, F90 had been set to 'f90' instead # of 'ifort' vi /d/vnfe1/home/matt/scripts/setvars setINTEL_P4() { CC='icc'; export CC; test "X$1" = X && echo "CC = $CC"; CFLAGS='-O3 -tpp7 -axW'; export CFLAGS; test "X$1" = X && echo "CFLAGS = $CFLAGS"; . . . F90='ifort'; export F90; test "X$1" = X && echo "F90 = $F90" F90FLAGS='-O3 -tpp7 -axW -cm -Vaxlib'; export F90FLAGS; test "X$1" = X && echo "F90FLAGS = $F90FLAGS" } # Restore vnCommand matt->root translation viw vnCommand vnNbgCommand ############################################################ Thu Jun 2 19:52:02 PDT 2005 ############################################################ # Maggie reports problem compiling (?) From maggie@math.ubc.ca Thu Jun 2 19:52:33 2005 Date: Wed, 01 Jun 2005 11:58:57 -0700 From: maggie To: Matthew W. Choptuik Subject: vnfe3 problem Hi Matt, The same program run on vnfe4, but not on vnfe3, so maybe there's a problem with vnfe3. The error message I got: p7_7565: p4_error: interrupt SIGFPE: 8 Broken pipe Broken pipe Broken pipe Broken pipe Thanks. -- regards, ************************************** mei(maggie) wang, PhD candidate # As matt@vnfe1 cdex Scan # OK # Ask Maggie if it's still a problem ############################################################ Mon Jun 6 10:41:50 PDT 2005 ############################################################ # In response to request to reduce usage on vnfe3, Maggie # pleads for more space, will make home on /home2, but also # query her re nature of her usage, and processor configuration # that is accessing it. # As root@vnfe3 cd /home2 mkdir maggie chown -R maggie.bushe maggie ############################################################ Thu Jun 23 16:42:12 PDT 2005 ############################################################ # Reconfiguring first of Steve P's machines as vn nodes # # First need to back up, bring up without network cable plugged # in reconfigure as # # bhtest1.physics.ubc.ca # 142.103.234.190 # As root@bhtest1 # Backup to vnfe4:/home mkdir -p /d/vnfe4/home mount -a mkdir -p /d/vnfe4/home/mariah cp / cp -a backup etc home misc root var /d/vnfe4/home/mariah mkdir -p /d/vnfe4/home/mariah/usr cp /usr cp -a local /d/vnfe4/home/mariah/usr/local # Latency with vnfe4 is attrocious, probably the freaking # backups ... nope, Pal is apparently exonerated this time, more likely # some multiprocessor "banging" Fri Jun 24 09:02:03 PDT 2005 # Copy finished last evening. Rudimentary check ... # As root@bhtest1 cd / du -hs backup etc home misc root var 17G backup 58M etc 32G home 4.0K misc 3.8M root 64M var # As root@bh6 cd /home/mariah du -hs backup etc home misc root var 17G backup 58M etc 32G home 4.0K misc 3.8M root 64M var # Full on # As root@bh6 cd / test -f /tmp/diff && /bin/rm -f /tmp/diff foreach d (backup etc home misc root var) echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++" echo "+++ diff'ing --- $d ---" echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++" diff -r $d /d/bh6/home/mariah/$d | tee -a /tmp/diff end ############################################################ Sat Jun 25 06:01:31 PDT 2005 ############################################################ # Pal has reported the following # but installation *hadn't* completed, and now boot up is # very slow # inclined to reinstall # Indeed, should install per bh machines with (of course) # # matt@bh0:~/scripts/secondary-vn # NOTE: HAVE DISABLED HYPERTHREADING IN BIOS NFS server bh9.physics.ubc.ca Mandriva /home/2005/i586 # Have now installed Mandriva, have reenabled hyperthreading # and is still abysmally slow # steve, of course, would be grinning right now, given my crowing # about how easy this would be! Sat Jun 25 17:01:43 PDT 2005 # Desparation blvd ... try noapic boot/kernel option Arc /etc/lilo.conf vi /etc/lilo.conf append=" ... noapic" # Initialization of USB controllers taking forever # Doesn't look like it's had any effect, better to check # BIOS with a more functioning machine # Perhaps is a problem with the KVM switch?? Should try direct connection, monitor?? # Should go ahead with seondary install, but needs to be rationalized (carefully!) # with ftp://laplace.../vns...post-install..o # At a minimum it will reduce the number of daemons that need to be started :-) ############################################################ Wed Jun 29 10:15:53 PDT 2005 ############################################################ # New account for David Garfinkle vi README.NEWUSERS 493 nico # nu setenv NU garfinkl cat<${NU} garfinkl:x:2000:600:David Garfinkle:/d/vnfe1/home/garfinkl:/bin/bash END vnNewUsers ${NU} As root@vnfe1 cp ~phys410/{.cshrc,.aliases} ~${NU}; chown -R garfinkl.choptuik ~${NU} ssh ${NU}@vnfe1 ssh ${NU}@vn35 date # OK etc sola; vs # $1$fl/6Fy0H$Ba9FQrI11w5ES8q1dLTAU/ vnDistEtc shadow # TODO: # ssh root@vnfe1 "cd ~${NU}; echo fameli@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" DUPLICATED ACCOUNT ON vnfe4 ?? ############################################################ Wed Jul 6 12:32:23 PDT 2005 ############################################################ # Trying to get MPI back in shape on old cluster # As root@vnfe[13] slocate libmpi > /tmp/libmpi !!ssh root@vnfe1 cat /tmp/libmpi /home/matt/autoconf/lam-6.3/man/man3/libmpi.3 /home/matt/debug/maggie/ildm/libmpich.a /home/murray/lib/libmpi.la /home/murray/lib/libmpi.a /home/murray/src/lam-6.5.7/share/mpi/.libs/libmpi.a /home/murray/src/lam-6.5.7/share/mpi/.libs/libmpi.la /home/murray/src/lam-6.5.7/share/mpi/libmpi.la /home/murray/src/lam-6.5.7/share/.libs/libmpi.lai /home/murray/src/lam-6.5.7/share/.libs/libmpi.a /home/murray/src/lam-6.5.7/share/.libs/libmpi.la /home/murray/src/lam-6.5.7/share/libmpi.la /home/murray/src/lam-6.5.7/man/man3/libmpi.3 /home/murray/src/lam-6.5.7/mpi2c++/src/libmpi++.a /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a /home2/aadutton/mm/mg/lib/libmpi.a /home2/dale/parwave2d/wave2d/lib_beowulf/libmpi.a /home2/jfn/software/lam-6.5.6/share/mpi/.libs/libmpi.a /home2/jfn/software/lam-6.5.6/share/mpi/.libs/libmpi.la /home2/jfn/software/lam-6.5.6/share/mpi/libmpi.la /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.lai /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.a /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.la /home2/jfn/software/lam-6.5.6/share/libmpi.la /home2/jfn/software/lam-6.5.6/man/man3/libmpi.3 /home2/jfn/software/lam-6.5.6/mpi2c++/src/libmpi++.a /home2/jfn/software/lam/lib/libmpi.la /home2/jfn/software/lam/lib/libmpi.a /home2/jfn/software/lam/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6/share/mpi/.libs/libmpi.a /home2/suresh/software/lam-6.5.6/share/mpi/.libs/libmpi.la /home2/suresh/software/lam-6.5.6/share/mpi/libmpi.la /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.lai /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.a /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.la /home2/suresh/software/lam-6.5.6/share/libmpi.la /home2/suresh/software/lam-6.5.6/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6/mpi2c++/src/libmpi++.a /home2/suresh/software/lam/lib/libmpi.a /home2/suresh/software/lam/lib/libmpi++.a /home2/suresh/software/lam/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6-install/lib/libmpi.la /home2/suresh/software/lam-6.5.6-install/lib/libmpi.a /home2/suresh/software/lam-6.5.6-install/man/man3/libmpi.3 /home3/wkb/cse/libmpich.a !!ssh root@vnfe3 cat /tmp/libmpi /home/maggie/ildm/libmpich.a /home/maggie/DLRFlame/libmpich.a /home/michalak/mpich-1.2.5/mpid/ch_p4/libmpid.a /home/michalak/mpich-1.2.5/lib/libmpich.a /home/michalak/mpich-1.2.5/lib/libmpichfsup.a /home/stadel/lam-6.3.2/lib/otb-obj/mpi/libmpi.a /home/stadel/lam-6.3.2/man/man3/libmpi.3 /home/stadel/lam-6.3.2/mpi2c++/src/libmpi++.a /home/stadel/lam/lib/libmpi.a /home/stadel/lam/lib/libmpi++.a /home/stadel/lam/man/man3/libmpi.3 /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a ############################################################ Tue Jul 12 16:20:19 PDT 2005 ############################################################ # mariah back from varsity, who installed Windows and found no # performance issues. Will first try installing Mandriva 2005 LE # on whitney (conigured as bhtest2) then return to mariah # # whitney # bhtest2.physics.ubc.ca # 142.103.234.191 # As root@bhtest2 # Backup to vnfe4:/home # As root@bhtest2 mkdir -p /d/vnfe4/home mount -a mkdir -p /d/vnfe4/home/whitney cd / cp -a boot backup etc home misc root var /d/vnfe4/home/whitney mkdir -p /d/vnfe4/home/whitney/usr cd /usr cp -a local /d/vnfe4/home/whitney/usr/local # TODO: # As root@bhtest2 cd / du -hs boot backup etc home misc root var 3.7M boot 39G backup 60M etc 62G home 4.0K misc 484M root 62M var cd /usr/local du -hs * 4.0K bin 4.0K etc 4.0K games 4.0K grace 4.0K include 28M lib 4.0K libexec 4.0K mdout 4.0K mpich-1.2.6 4.0K sbin 64K share 4.0K src # As root@vnfe4 cd /home/mariah du -hs boot backup etc home misc root var cd /home/mariah/usr/local du -hs * 17G backup 58M etc 32G home 4.0K misc 3.8M root 64M var # Full on # As root@bh6 cd / test -f /tmp/diff && /bin/rm -f /tmp/diff foreach d (backup etc home misc root var) echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++" echo "+++ diff'ing --- $d ---" echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++" diff -r $d /d/bh6/home/mariah/$d | tee -a /tmp/diff end ############################################################ Sun Jul 17 16:50:20 PDT 2005 ############################################################ # See matt@bh0:~/system/bh/README for similar details to above # for whitney. Same pitiful performance, so am now trying 10.1 ############################################################ Sun Jul 17 16:51:08 PDT 2005 ############################################################ # Restoring MPI state to something resembling the web documentation # As root@vnfe1 slocate libmpi | tee /tmp/slocate-libmpi !!ssh root@vnfe1 cat /tmp/slocate-libmpi /home/matt/autoconf/lam-6.3/man/man3/libmpi.3 /home/matt/debug/maggie/ildm/libmpich.a /home/murray/lib/libmpi.la /home/murray/lib/libmpi.a /home/murray/src/lam-6.5.7/share/mpi/.libs/libmpi.a /home/murray/src/lam-6.5.7/share/mpi/.libs/libmpi.la /home/murray/src/lam-6.5.7/share/mpi/libmpi.la /home/murray/src/lam-6.5.7/share/.libs/libmpi.lai /home/murray/src/lam-6.5.7/share/.libs/libmpi.a /home/murray/src/lam-6.5.7/share/.libs/libmpi.la /home/murray/src/lam-6.5.7/share/libmpi.la /home/murray/src/lam-6.5.7/man/man3/libmpi.3 /home/murray/src/lam-6.5.7/mpi2c++/src/libmpi++.a /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a /home2/aadutton/mm/mg/lib/libmpi.a /home2/dale/parwave2d/wave2d/lib_beowulf/libmpi.a /home2/jfn/software/lam-6.5.6/share/mpi/.libs/libmpi.a /home2/jfn/software/lam-6.5.6/share/mpi/.libs/libmpi.la /home2/jfn/software/lam-6.5.6/share/mpi/libmpi.la /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.lai /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.a /home2/jfn/software/lam-6.5.6/share/.libs/libmpi.la /home2/jfn/software/lam-6.5.6/share/libmpi.la /home2/jfn/software/lam-6.5.6/man/man3/libmpi.3 /home2/jfn/software/lam-6.5.6/mpi2c++/src/libmpi++.a /home2/jfn/software/lam/lib/libmpi.la /home2/jfn/software/lam/lib/libmpi.a /home2/jfn/software/lam/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6/share/mpi/.libs/libmpi.a /home2/suresh/software/lam-6.5.6/share/mpi/.libs/libmpi.la /home2/suresh/software/lam-6.5.6/share/mpi/libmpi.la /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.lai /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.a /home2/suresh/software/lam-6.5.6/share/.libs/libmpi.la /home2/suresh/software/lam-6.5.6/share/libmpi.la /home2/suresh/software/lam-6.5.6/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6/mpi2c++/src/libmpi++.a /home2/suresh/software/lam/lib/libmpi.a /home2/suresh/software/lam/lib/libmpi++.a /home2/suresh/software/lam/man/man3/libmpi.3 /home2/suresh/software/lam-6.5.6-install/lib/libmpi.la /home2/suresh/software/lam-6.5.6-install/lib/libmpi.a /home2/suresh/software/lam-6.5.6-install/man/man3/libmpi.3 /home3/wkb/cse/libmpich.a # As root@vnfe3 slocate libmpi | tee /tmp/slocate-libmpi /home/maggie/ildm/libmpich.a /home/maggie/DLRFlame/libmpich.a /home/michalak/mpich-1.2.5/mpid/ch_p4/libmpid.a /home/michalak/mpich-1.2.5/lib/libmpich.a /home/michalak/mpich-1.2.5/lib/libmpichfsup.a /home/stadel/lam-6.3.2/lib/otb-obj/mpi/libmpi.a /home/stadel/lam-6.3.2/man/man3/libmpi.3 /home/stadel/lam-6.3.2/mpi2c++/src/libmpi++.a /home/stadel/lam/lib/libmpi.a /home/stadel/lam/lib/libmpi++.a /home/stadel/lam/man/man3/libmpi.3 /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a # vnfe1 /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a /usr/local/build/LINUX/ch_p4/bin/mpiuninstall /usr/local/mpiuninstall /usr/local/PGI/build/LINUX/ch_p4/bin/mpiuninstall /usr/local/PGI/mpiuninstall /usr/local/intel/sbin/mpiuninstall find / -name libmpi.a | tee /tmp/find-libmpi.a #vnfe3 /usr/local/lib/libmpi.a /usr/local/build/LINUX/ch_p4/lib/libmpich++.a /usr/local/build/LINUX/ch_p4/lib/libmpich.a /usr/local/pgi/linux86/5.2/lib/libmpich.ipl /usr/local/PGI/build/LINUX/ch_p4/lib/libmpich.a /usr/local/PGI/lib/libmpi.a /usr/local/PGI/lib/libmpich_.a /usr/local/intel/lib/libmpich.a /usr/local/intel/lib/libmpichfsup.a /usr/local/intel/lib/libmpichf90.a /usr/local/intel/lib/libmpichf90nc.a /usr/local/build/LINUX/ch_p4/bin/mpiuninstall /usr/local/mpiuninstall /usr/local/PGI/build/LINUX/ch_p4/bin/mpiuninstall /usr/local/PGI/mpiuninstall /usr/local/intel/sbin/mpiuninstall find / -name libmpi.a | tee /tmp/find-libmpi.a !!ssh root@vnfe3.physics.ubc.ca cat /tmp/find-libmpi.a /usr/local/lib/libmpi.a /usr/local/PGI/lib/libmpi.a ls -lt /usr/local/lib/libmpi.a ls -lt /usr/local/PGI/lib/libmpi.a # Dangling sym links RM /usr/local/lib/libmpi.a RM /usr/local/PGI/lib/libmpi.a # Get most recent version (1.2.7) of mpich http://www-unix.mcs.anl.gov/mpi/mpich/ # Do by hand on one machine, then write script # Environment variables CC FC # As root@vn1 cdi tar zxf ~matt/autoconf/mpich-1.2.7.tar.gz !!ssh root@vn1 cat /tmp/help Configuring with args --help Configuring MPICH Version 1.2.7 (release) of : 2005/06/22 16:33:49 Usage: ./configure [--with-arch=ARCH_TYPE] [--with-comm=COMM_TYPE] [--with-device=DEVICE] [--with-mpe] [--without-mpe] [--without-romio] [--disable-f77] [--disable-f90] [--with-f90nag] [--with-f95nag] [--disable-f90modules] [--disable-gencat] [--disable-doc] [--enable-cxx ] [--disable-cxx] [--with-coll[=filename]] [--enable-mpedbg] [--disable-mpedbg] [--enable-devdebug] [--disable-devdebug] [--enable-debug] [--disable-debug] [--enable-traceback] [--disable-traceback] [--enable-long-long] [--disable-long-long] [--enable-long-double] [--disable-long-double] [-prefix=INSTALL_DIR] [-c++[=C++_COMPILER] ] [noc++] [-opt=OPTFLAGS] [-cc=C_COMPILER] [-fc=FORTRAN_COMPILER] [-clinker=C_LINKER] [-flinker=FORTRAN_LINKER] [-c++linker=CC_LINKER] [-cflags=CFLAGS] [-fflags=FFLAGS] [-c++flags=CCFLAGS] [-optcc=C_OPTFLAGS] [-optf77=F77_OPTFLAGS] [-f90=F90_COMPILER] [-f90flags=F90_FLAGS] [-f90inc=INCLUDE_DIRECTORY_SPEC_FORMAT_FOR_F90] [-f90linker=F90_LINKER] [-f90libpath=LIBRARY_PATH_SPEC_FORMAT_FOR_F90] [-lib=LIBRARY] [-mpilibname=MPINAME] [-mpe_opts=MPE_OPTS] [-make=MAKEPGM ] [-memdebug] [-ptrdebug] [-tracing] [-dlast] [-listener_sig=SIGNAL_NAME] [-cross] [-adi_collective] [-automountfix=AUTOMOUNTFIX] [-noranlib] [-ar_nolocal] [-rsh=RSHCOMMAND] [-rshnol] [-file_system=FILE_SYSTEM] [-p4_opts=P4_OPTS] where ARCH_TYPE = the type of machine that MPI is to be configured for COMM_TYPE = communications layer or option to be used DEVICE = communications device to be used INSTALL_DIR = directory where MPI will be installed (optional) MPE_OPTS = options to pass to the mpe configure P4_OPTS = options to pass to the P4 configure (device=ch_p4) C++_COMPILER = default is to use xlC, g++, or CC (optional) OPTFLAGS = optimization flags to give the compilers (e.g. -g) CFLAGS = flags to give C compiler FFLAGS = flags to give Fortran compiler MAKEPGM = version of make to use LENGTH = Length of message at which ADI switches from short to long message protocol AUTOMOUNTFIX = Command to fix automounters RSHCOMMAND = Command to use for remote shell MPILIBNAME = Name to use instead of mpich in the name of the MPI library. If set, libMPILIBNAME will be used instead or libmpich. This can be used on systems with several different MPI implementations. FILE_SYSTEM = name of the file system ROMIO is to use. Currently supported values are nfs, ufs, pfs (Intel), piofs (IBM), hfs (HP), sfs (NEC), and xfs (SGI). SIGNAL_NAME = name of the signal for the P4 (device=ch_p4) device to use to indicate that a new connection is needed. By default, it is SIGUSR1. All arguments are optional, but if 'arch', 'comm', or 'prefix' arguments are provided, there must be only one. 'arch' must be specified before 'comm' if they both appear. Packages that may be included with MPICH --with-device=name - Use the named device for communication. Known names include ch_p4, ch_mpl, ch_shmem, and globus2. If not specified, a default is chosen. Special options for the device are specified after the device name, separated by a colon. E.g., --with-device=globus2:-flavor=mpi,nothreads --with-romio[=OPTIONS] - Use ROMIO to provide MPI-I/O from MPI-2 (default). The options include --with-file-system=FSTYPE, where fstype can be any combination of nfs, ufs, pfs (intel), piofs (IBM), hfs (HP), sfs (NEC), and xfs (SGI), combined with '+'. If romio is not included, the Fortran 90 modules cannot be built. --with-mpe - Build the MPE environment (default) --with-coll[=name] - Select the implementation of the MPI collective routines that should be used. By default, uses the most advanced version. The name is the name of the file (without the suffix) in src/coll that should be used for the collective routines. Currently, name may be either intra_fns_new (the default) or intra_fns . --with-f90nag - Choose the NAG f90 compiler for Fortran (preliminary version intended for use *instead* of a Fortran 77 compiler) --with-f95nag - Choose the NAG f95 compiler for Fortran --with-cross=file - Use the file for cross compilation. The file should contain assignments of the form CROSS_SIZEOF_INT=4 for each cross compilation variable. The command egrep 'CROSS_[A-Z_]*=' configure | sed 's/=.*//g' will list each variable. You can use --without- to turn off a feature (except for device). Options for device ch_lfshmem: --with-device=ch_lfshmem[:-usesysv] The option '-usesysv' applies to the ch_shmem device, and causes the device to attempt and use System V shared memory and semaphore routines, rather than what would be chosen by default (often mmap or a system-specific method). Options for device ch_meiko: --with-device=ch_meiko Options for device ch_mpl: --with-device=ch_mpl Options for device ch_p4: --with-device=ch_p4[:-listener_sig=SIGNALNAME][-dlast][-socksize=BYTES] The option '-listener_sig' applies to the ch_p4 device, and changes the signal that is used to signal that a new connection should be made. By default, SIGUSR1 is used. The option '-dlast' causes the p4 device to remember the last few debugging messages, printing them out only when the job terminates abnormally. The option '-socksize' changes the size of the socket buffers used. Options for device ch_p4mpd: --with-device=ch_p4mpd[:-listener_sig=SIGNALNAME][-dlast][-socksize=BYTES] The option '-listener_sig' applies to the ch_p4mpd device, and changes the signal that is used to signal that a new connection should be made. By default, SIGUSR1 is used. The option '-dlast' causes the p4 device to remember the last few debugging messages, printing them out only when the job terminates abnormally. The option '-socksize' changes the size of the socket buffers used. Options for device ch_shmem: --with-device=ch_shmem[:-usesysv] The option '-usesysv' applies to the ch_shmem device, and causes the device to attempt and use System V shared memory and semaphore routines, rather than what would be chosen by default (often mmap or a system-specific method). Options for device globus2: *# GLOBUS_INSTALL_PATH must be set Features that may be included with MPICH --enable-cxx - Build C++ interfaces to the MPI-1 routines (default) --enable-f77 - Build Fortran 77 interfaces to the MPI routines (default) --enable-weak-symbols - Use weak symbols for MPI/PMPI routines. This uses weak symbols, if available, for the profiling interface (default) --enable-debug - Enable support for debuggers to access message queues --enable-traceback - Enable printing of a call stack when MPI and the user's program is built with certain compilers (currently only some versions of gcc are supported). --enable-mpedbg - Enable the -mpedbg command-line argument (e.g., errors can start an xterm running a debugger). Only works with some workstation systems. --enable-sharedlib - Attempt to build shared libraries. Static --enable-sharedlib=dir libraries are always built. If a directory is specified, the shared libraries will be placed in that directory. This can be used to place the shared libraries in a uniform location in local disks on a cluster. --enable-f90modules - Build Fortran 90 module support (default if a Fortran 90 or 95 compiler is found). If ROMIO is not built, no Fortran 90 modules will be built. The following are intended for MPI implementors and debugging of configure --enable-strict - Try and build MPICH using strict options in Gnu gcc --enable-echo - Cause configure to echo what it does --enable-devdebug - Enable debugging code in the ADI. You can use --disable- to turn off a feature. Notes on configure usage: The suggestions for GNU configure usage suggest that configure not be used to build different tools, only controlling some basics of the features enabled or the packages included. Our use of configure does not follow these rules because configure is too useful but we need the flexibility that allows the user to produce variations of MPICH. More notes on command-line parameters: You can select a different C and Fortran compiler by using the '-cc' and 'fc' switches. The environment variables 'CC' and 'FC' can also provide values for these but their settings may be overridden by the configure script. Using '-cc=$CC -fc=$FC' will force configure to use those compilers. The option '-opt' allows you to specify optimization options for the compilers (both C and Fortran). For example, '-opt=-O' chooses optimized code generation on many systems. '-optcc' and '-optf77' allow you to specify options for just the C or Fortran compilers. Use -cflags and -fflags for options not related to optimization. Note that the '-opt' options are not passed to the 'mpicc', 'mpif77', 'mpicxx', and 'mpif90' scripts. The '-opt' options are used only in building MPICH. The option '-lib' allows you to specify the location of a library that may be needed by a particular device. Most devices do NOT need this option; check the installation instructions for those that might. The option '-make' may be used to select an alternate make program. For example, to make use of VPATH builds (building in one directory with the source in a different directory), -make=gnumake may be required. The option '--disable-short-longs' may be used to suppress support for the C types 'long long' (a common extension) and 'long double' (ANSI/ISO C) when they are the same size as 'long' and 'double' respectively. Some systems allow these long C types, but generate a warning message when they are used; this option may be used to suppress these messages (and support for these types). '--disable-long-long' disables just 'long long'; '--disable-long-double' disables just 'long double'. The option '-ar_nolocal' prevents the library archive command from attempting to use the local directory for temporary space. This option should be used when (a) there isn't much space (less than 20 MB) available in the partition where MPICH resides and (b) there is enough space in /tmp (or wherever ar places temporary files by default). The option '-noranlib' causes the 'ranlib' step (needed on some systems to build an object library) to be skipped. This is particularly useful on systems where 'ranlib' is optional (allowed but not needed; because it is allowed, configure chooses to use it just in case) but can fail (some 'ranlib's are implemented as scripts using 'ar'; if they don't use the local directory, they can fail (destroying the library in the process) if the temporary directory (usually '/tmp') does not have enough space. This has occured on some OSF systems. The environment variable 'RSHCOMMAND' allows you to select an alternative remote shell command (by default, configure will use 'rsh' or 'remsh' from your 'PATH'). If your remote shell command does not support the '-l' option (some AFS versions of 'rsh' have this bug), also give the option '-rshnol'. These options are useful only when building a network version of MPICH (e.g., '--with-device=ch_p4'). The configure option '-rsh' is supported for backward compatibility. Special Tuning Options: There are a number of options for tuning the behavoir of the ADI (Abstract Device Interface) which is the low-level message-passing interface. These should NOT be used unless you are sure you know what you are doing. The option '-pkt_size=LENGTH' allows you to choose the message length at which the ADI (Abstract Device Interface) switches from its short to long message format. LENGTH must be positive. The option '-adi_collective' allows the ADI to provide some collective operations in addition to the basic point-to-point operations. Currently, most systems do not support this option (it is ignored) and on the others it has not been extensively tested. Options for Experts: The option '-memdebug' enables extensive internal memory debugging code. This should be used only if you are trying to find a memory problem (it can be used to help find memory problems in user code as well). Running programs with the option '-mpidb memdump' will produce a summary, when 'MPI_Finalize' is called, of all unfreed memory allocated my MPI. For example, a user-created datatype that was not later freed would be reported. The option '-tracing' enables tracing of internal calls. This should be used only for debugging the MPICH implementation itself. The option '-dlast' enables tracing of the most recent operations performed by the device. These can be output when a signal (like SIGINT), error, or call to a special routine occurs. There is a performance penalty for this option, but it can be very useful for implementors attempting to debug problems. Sample Configure Usage: To make for running on Sun's running Solaris with ch_p4 as the device, and with the installation directory equal to the current directory: ./configure --with-device=ch_p4 --with-arch=solaris make Known devices are ch_nx (native Intel NX calls), ch_mpl (native IBM EUI or MPL calls), ch_p4 (p4) ch_p4mpd (p4 with the MPD startup system) globus2 (Globus: globus_io/vMPI) ch_meiko (for Meiko CS2, using NX compatibility library), ch_shmem (for shared memory systems, such as SMPs), ch_lfshmem(for shared memory systems, such as SMPs; uses lock-free message buffers), ch_cenju3 (native NEC Cenju-3 calls) ch_gm (native Myrinet GM, distributed and supported by Myricom) The following devices were supported with ADI-1, but are currently unsupported. Please contact us if you are interested in helping us support these devices: meiko (for Meiko CS2, using elan tport library), and nx (for Intel Paragon), t3d (for the Cray T3D, using Cray shmem library). ch_nc (native nCUBE calls, requires -arch=ncube), ch_cmmd (native TMC CM-5 CMMD calls) These are no longer distributed with the MPICH distribution. Known architectures include (case is important) alpha (Compaq alpha) CRAY (CRAY XMP, YMP, C90, J90, T90) cray_t3d (CRAY T3D) CYGWIN_NT (PCs using Cygwin) EWS_UX_V (NEC EWS4800/360AD Series workstation. Untested.) freebsd (PC clones running FreeBSD) hpux (HP UX) intelnx (Intel i860 or Intel Delta) IRIX (synonym for sgi) IRIX32 (IRIX with 32bit objects -32) IRIXN32 (IRIX with -n32) IRIX64 (IRIX with 64bit objects) ksr (Kendall Square KSR1 and KSR2) LINUX (PC clones running LINUX) LINUX_ALPHA (Linux on Alpha processors) meiko (Meiko CS2) netbsd (PC clones running NetBSD) paragon (Intel Paragon) rs6000 (AIX for IBM RS6000) sgi (Silicon Graphics IRIX 4.x, 5.x or 6.x) sgi5 (Silicon Graphics IRIX 5.x on R4400's, for the MESHINE) solaris (Solaris) solaris86 (Solaris on Intel platforms) sppux (SPP UX) sun4 (SUN OS 4.x) SX_4_float0 (NEC SX-4; Floating point format float0 Conforms IEEE 754 standard. C: sizeof (int) = 4; sizeof (float) = 4 FORTRAN: sizeof (INTEGER) = 4; sizeof (REAL) = 4) SX_4_float1 (NEC SX-4; Floating point format float1 IBM floating point format. C: sizeof (int) = 4; sizeof (float) = 4 FORTRAN: sizeof (INTEGER) = 4; sizeof (REAL) = 4) SX_4_float2 (NEC SX-4; Floating point format float2 CRAY floating point format. C: sizeof (int) = 4; sizeof (float) = 8 FORTRAN: sizeof (INTEGER) = 8; sizeof (REAL) = 8) !!! WARNING !!! This version will not run together with FORTRAN routines. sizeof (INTEGER) != sizeof (int) SX_4_float2_int64 (NEC SX-4; Floating point format float2 and 64-bit int's) C: sizeof (int) = 8; sizeof (float) = 8 FORTRAN: sizeof (INTEGER) = 8; sizeof (REAL) = 8) tflops (Intel TFLOPS) UXPM (UXP/M. Untested.) uxpv (uxp/v. Untested.) Others may be recognized. Special notes: For SGI (--with-arch=IRIX) multiprocessors running the ch_p4 device, use -comm=ch_p4 to disable the use of the shared-memory p4 communication device, and -comm=shared to enable the shared-memory p4 communication device. The default is to enable the shared-memory communication device. # Coded matt@vnfe1:~/scripts/vnMPIinstall.gnu # As root@vn1 vnMPIinstall.gnu vnMPIinstall.gnu: Configuration/build finished. Log install.gnu.log # Looks OK, check library link, ability to compile and run on vn1 # As matt@vn1, and after hacking cpi-mpi-gnu from cpi-mpi-pgi, editing # Makefile # As matt@vn1 cd /d/vnfe1/home/matt/examples/cpi-mpi-gnu make # superficially OK, so remove and reinstall on ALL machines after scanning # all for installed versions ssh root@vn1 /usr/local/sbin/mpiuninstall vnCommand 'which vnMPIinstall.gnu' # OK vnallbgCommand 'find / -name libmpi.a | tee /tmp/find-libmpi.a' # TODO: vnallbgCommand 'vnMPIinstall.gnu' # Hacked vnMPIinstall.pgi from vnMPIinstall.gnu vnallCommand 'which vnMPIinstall.pgi' # As root@vn10 vnMPIinstall.pgi # As matt@vn10 cd examples cd cpi-mpi-pgi make # OK ssh root@vn10 /usr/local/PGI/sbin/mpiuninstall vnallbgCommand 'vnMPIinstall.pgi' # Hacked vnMPIinstall.intel from vnMPIinstall.gnu vnallbgCommand '/usr/local/intel/sbin/mpiuninstall' vnallCommand 'which vnMPIinstall.intel' # As root@vn19 vnMPIinstall.intel # TODO: Check, then # TODO: vnallbgCommand 'vnMPIinstall.intel' # TODO: PROBLEM WITH COMPILATION, INSTALLATION # Ah yes, the good old 'pragma' business. Someone should be # hung out by their genitalia for this sort of sh^& (i.e. # trying to do the work of a translator in a preprocessor # vs something like 'mfi') ./mpich-1.2.7/src/fortran/src/abortf.c # Pass abortf.c though PGI/INTEL cpp # Disable all the pragma crap and replace with void mpi_abort_ ( MPI_Fint * comm , MPI_Fint * errorcode , MPI_Fint * __ierr ) ; # Intel compiler gets confused by #pragma's left after pre-processing, so with # luck, following fix will work !!ssh -x root@vn19.physics.ubc.ca cat /tmp/m #.c.o: #${C_COMPILE} -c $< .c.o: $(C_COMPILE) -E $*.c | grep -v '#pragma' > $*.i $(C_COMPILE) -c $*.i > $*.o .c.i: $(C_COMPILE) -E $*.c > $*.i .i.o: $(C_COMPILE) -c $*.i > $*.o clean: /bin/rm *.i # As root@vn19 cd /var/tmp/install/intel/mpich-1.2.7 make # Looks OK, except for -O2 (whither?) and -O3 etc. conflict make install # As matt@vn19 cd examples/cpi-mpi-intel make clean; make # OK scp -q root@vn19.physics.ubc.ca:/usr/tmp/install/intel/mpich-1.2.7/src/fortran/src/Makefile . # As matt@bh0 cda cd mpich-1.2.7 make clean cd .. cp -a mpich-1.2.7 mpich-1.2.7-UBCp0 cd !$/src/fortran/src Bu Makefile scp -q root@vn19.physics.ubc.ca:/usr/tmp/install/intel/mpich-1.2.7/src/fortran/src/Makefile . # Packaged, exported, and ready to scp! scp matt@bh0.physics.ubc.ca:/d/bh0/home/matt/autoconf/mpich-1.2.7-UBCp0.tar.gz . :!scp -q root@vn19.physics.ubc.ca:/usr/tmp/install/intel/mpich-1.2.7-UBCp0/src/fortran/src/Makefile.in . # As matt@vnfe1 cds Arc vnMPIinstall.intel vi !$ PACK=mpich-1.2.7-UBCp0 # As root@vn19 /usr/local/intel/sbin/mpiuninstall vnMPIinstall.intel # ... whoops, trashed configuration with 'E', next time simply cp to /d/vnfe1/home/matt/autoconf # As matt@bh0 cda RM -rf mpich-1.2.7-UBCp0 cp -a mpich-1.2.7 mpich-1.2.7-UBCp0 cd !$/src/fortran/src CP /d/vnfe1/home/matt/system/vn/Makefile.in . cda RM mpich-1.2.7-UBCp0.tar.gz tar czf mpich-1.2.7-UBCp0.tar.gz mpich-1.2.7-UBCp0 CP mpich-1.2.7-UBCp0.tar.gz /d/vnfe1/home/matt/autoconf # As root@vn19 vnMPIinstall.intel # OK # As matt@vn19 cd examples/cpi-mpi-intel; make clean; make # OK ssh root@vn19 /usr/local/intel/sbin/mpiuninstall vnallbgCommand vnMPIinstall.intel # Hacked vnCommand to log in as matt # GNU vnCommand 'cd /d/vnfe1/home/matt/examples/cpi-mpi-gnu; make clean; make' # TODO: OK? # Intel # TODO vnCommand 'cd /d/vnfe1/home/matt/examples/cpi-mpi-intel; make clean; make' # PGI vnCommand 'cd /d/vnfe1/home/matt/examples/cpi-mpi-pgi; make clean; make' # TODO: OK? # TODO: Verify veracity of on-line examples, update /etc/motd, web page # C/intel: ?? # F77/intel: ?? # C/pgi: NOPE /home/matt/work/cpi-mpi-pgi vnrun -n 4 cpi Will execute 'ssh -x vn37 cd /d/vnfe1/home/matt/work/cpi-mpi-pgi; time mpirun -np 4 -machinefile mfile cpi ' Will use the following machine file vn20 vn21 vn22 /d/vnfe1/home/matt/work/cpi-mpi-pgi/cpi: /usr/local/pgi/linux86/5.2/lib/libpthread.so.0: version `GLIBC_2.3.3' not found (required by /lib/tls/librt.so.1) 0.211u 0.348s 0:02.24 24.5% 0+0k 0+0io 0pf+0w # As matt ldd cpi ./cpi: /usr/local/pgi/linux86/5.2/lib/libpthread.so.0: version `GLIBC_2.3.3' not found (required by /lib/tls/librt.so.1) linux-gate.so.1 => (0xffffe000) libpthread.so.0 => /usr/local/pgi/linux86/5.2/lib/libpthread.so.0 (0x40016000) librt.so.1 => /lib/tls/librt.so.1 (0x4007b000) libc.so.6 => /lib/tls/libc.so.6 (0x4008f000) libpgc.so => /usr/local/pgi/linux86/5.2/lib/libpgc.so (0x401ae000) libm.so.6 => /lib/tls/libm.so.6 (0x401c3000) /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000) vnallbgCommand updatedb slocate 2.3.3 # Try relocating /usr/local/pgi/linux86/5.2/lib/libpthread.so.0 mv /usr/local/pgi/linux86/5.2/lib/libpthread.so.0 /usr/local/pgi/linux86/5.2/lib/libpthread.so.0.PGI # ... and indeed, that seems to work vnallbgCommand 'mv /usr/local/pgi/linux86/5.2/lib/libpthread.so.0 /usr/local/pgi/linux86/5.2/lib/libpthread.so.0.PGI' viw vnCommand vnCommand 'setenv X cpi-mpi-pgi; mkdir -p ~/work; cd ~/work; test -d $X && /bin/rm -rf $X; cp -a ~matt/examples/$X .; cd $X; make clean; make; make run4' | tee /tmp/cpi-mpi-pgi vnCommand 'setenv X cpi-mpi-intel; mkdir -p ~/work; cd ~/work; test -d $X && /bin/rm -rf $X; cp -a ~matt/examples/$X .; cd $X; make clean; make; make run4' | tee /tmp/cpi-mpi-intel vnCommand 'setenv X cpi-mpi-gnu; mkdir -p ~/work; cd ~/work; test -d $X && /bin/rm -rf $X; cp -a ~matt/examples/$X .; cd $X; make clean; make; make run4' | tee /tmp/cpi-mpi-gnu # OK?? (not completely, may be "bad node"?) # NOT CLEAR WHAT HAPPENED HERE, BUT WILL DEEM TRANSIENT # Check all NFS mounts vnCommand df | tee /tmp/df vnCommand 'cd ~matt/examples; ls' | tee /tmp/examples # Nothing abnormal ... scan cdex # Master vn1 Scan # ... seems OK ... # cpi-mpi-pgi make run4 make run40 # TODO: OK? # TODO: EXPLICITLY VERIFY ONLINE MATERIAL AND UPDATE WEB PAGE /etc/motd # TODO: Intel Fortran is broken ... ############################################################ Thu Jul 21 16:51:02 PDT 2005 ############################################################ foreach u (ajpenner) vnallbgCommand "ps -elf | grep $u | grep -v grep | nth 4 | pre kill -9 | csh" end ############################################################ Sat Jul 30 07:49:40 PDT 2005 ############################################################ vn22 down 18:45 vn29 down 18:43 vn32 down 18:42 vn41 down 19:07 # See README.CRASH (CRASH_186 CRASH_187 CRASH_188 CRASH_189) ############################################################ Sun Aug 28 12:06:01 PDT 2005 ############################################################ # Need to update # # /etc/fstab # /Public # # as root@vnfe[13] # As matt@vnfe1 vnfeCommand 'mkdir -p /d/bh0/home2' # As root@vnfe1 cd /etc; Bu fstab vi fstab bh0:/home /d/bh0/home nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 bh0:/home2 /d/bh0/home2 nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 mount -a /bin/rm -f /Public ln -s /d/bh0/home2/home/laplace/usr2/Public /Public # As root@vnfe3 cd /etc; Bu fstab vi fstab bh0:/home /d/bh0/home nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 bh0:/home2 /d/bh0/home2 nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 #bh9:/home /d/bh9/home nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 mount -a /bin/rm -f /Public ln -s /d/bh0/home2/home/laplace/usr2/Public /Public # OK ############################################################ Mon Aug 29 14:35:53 PDT 2005 ############################################################ # Scott reports vnfe3:/home is full # As root@vnfe3 cd /home usage 3399584 suqin 1571284 tli 1431440 maggie 825736 pougatch 695992 fengxs 489576 jinbei 374560 atsci 311708 clarson # As root@vnfe3 mkdir -p /d/vnfe4/home/atsci/vnfe3/home mv /d/vnfe3/home/atsci /d/vnfe4/home/atsci/vnfe3/home # As matt@bh0 etc make import vi passwd atsci:!:9001:9000:Josh Hacker:/d/vnfe4/home/atsci/vnfe3/home/atsci:/bin/tcsh vnDistEtc passwd vnallbgCommand 'mount -a' vnCommand 'ls -ltd ~atsci' | tee /tmp/ls-atsci # OK # As matt@bh0 pine -f vn # Updated vnfe3-hogs list and sent message # End-of-incident # As root@vn5 jj ntp vnSetdate # vn16 down? # YUP, STRANGE,EH? # See README.CRASH (CRASH_190) ########################################################### Wed Sep 7 11:09:35 PDT 2005 ############################################################ # CRASH_191 # Wed Sep 7 11:08:51 PDT 2005 # vn28, noticed by Ben, serviced promptly by Ben and # Jason (power supply) back on line # See README.CRASH (CRASH_191) Wed Sep 7 11:09:43 PDT 2005 # As root@vn16 ntptimeset df # Who, if anyone, was running viw whowason # Changed pattern to grep from ': $node' # to ':[ ]* $node' whowason vn25 # shows nothing but root, idle, sshd etc., so we lucked out # and didn't trash any computations ########################################################### Sat Sep 10 14:57:44 PDT 2005 ########################################################### # Ben reports problem with vn41 # as matt@vn41 whowason vn41 # HAlf bazillion or more cron-initiated jobs (postfix/sendmail # ...?) --- should get the crontab cleaned up. Got it under # control by killing off a lot of processes, load average # seems to be coming down from a few hundred to somethign # reasonable, and perhaps won't need a reboot. # Check logs ... # For some reason / is read-only, WILL need to reboot # As root@vn41 reboot # See README.CRASH (CRASH_192), some freakish software thing # that got / into read-only, could be disk errors since such # were found and corrected in subsequent incident handling ########################################################### Tue Sep 13 08:40:02 PDT 2005 THIS IS ONLY A TEST ########################################################### ############################################################ Sat Sep 17 09:48:33 PDT 2005 ############################################################ # TODO: New account for Evgeny Sorkin vi README.NEWUSERS 801 evgeny # nu setenv NU evgeny cat<${NU} evgeny:x:801:600:Evgeny Sorkin:/d/vnfe1/home/evgeny:/bin/tcsh END vnNewUsers ${NU} #As root@vnfe1 cp ~phys410/{.cshrc,.aliases} ~${NU}; chown -R evgeny.choptuik ~${NU} ssh ${NU}@vnfe1 ssh ${NU}@vn35 date # OK etc sola; vs # 90MvigzBf1UTE vnDistEtc shadow # TODO: # ssh root@vnfe1 "cd ~${NU}; echo evgeny@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" ############################################################ Wed Sep 28 09:38:16 PDT 2005 ############################################################ # New account for Joerg. Give him his own group, # rottler # As matt@vnfe1 etc sola get group vi group rottler::1100: vnDistEtc group # As choptuik@physics sudo pwentry jrottler:x:811:307:Joerg Rottler:/home/jrottler:/bin/tcsh jrottler:wHGjwrZklw.J2:12970:::::: grep \:811\: /etc/passwd vi README.NEWUSERS 811 jrottler nu setenv NU jrottler cat<${NU} jrottler:x:811:1100:Joerg Rottler:/d/vnfe1/home/jrottler:/bin/tcsh END vnNewUsers ${NU} As root@vnfe1 setenv NU jrottler cp ~phys410/{.cshrc,.aliases} ~${NU}; chown -R jrottler.rottler ~${NU} ssh ${NU}@vnfe1 ssh ${NU}@vn35 date # OK etc sola; vs # wHGjwrZklw.J2 vnDistEtc shadow ssh root@vnfe1 "cd ~${NU}; echo jrottler@physics.ubc.ca > .forward; chown ${NU}.other .forward; ls -al; finger ${NU}" # DUPLICATED ACCOUNT ON vnfe4 # As matt@vnfe1 nu cd Blurbs ############################################################ Fri Sep 30 12:24:04 PDT 2005 ############################################################ # New account for Tonatiuh Matos (UBC PHAS Vstng Fac Choptuik) tmatos:x:829:307:Tonatiuh Matos:/home/tmatos:/bin/tcsh tmatos:ZUnTRjFig068U:13024:::::: # As matt@vnfe1 cd vn Arc README.USERS vi README.USERS 829 tmatos # Tonatiuh Matos (UBC PHAS Visiting Faculty Choptuik) nu setenv U tmatos cat<${U} ${U}:x:829:600:Tonatiuh Matos:/d/vnfe1/home/${U}:/bin/tcsh END vnNewUsers ${U} # As root@vnfe1 setenv U tmatos setenv G choptuik cp ~phys410/.cshrc ~${U}; chown ${U}.${G} ~${U}/.cshrc ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # As matt@vnfe1 etc sola; vs # ZUnTRjFig068U vnDistEtc shadow setenv U tmatos setenv G choptuik ssh root@vnfe1 "cd ~${U}; echo ${U}@physics.ubc.ca > .forward; chown ${U}.${G} .forward; ls -al; finger ${U}" vnCommand finger ${U} # OK ############################################################ # New account for Jaskeerat Makkar (UBC PHAS UG Choptuik) ############################################################ jmakkar:x:12610:400:Jaskeerat Makkar:/home2/jmakkar:/bin/tcsh jmakkar:w2LTo6AzQDVkg:11572::::::-1 # As matt@vnfe1 cd vn Arc README.USERS vi README.USERS 12610 jmakkar # Jaskeerat Makkar (UBC PHAS UG Choptuik) nu setenv U jmakkar cat<${U} ${U}:x:12610:600:Jaskeerat Makkar:/d/vnfe1/home/${U}:/bin/tcsh END vnNewUsers ${U} # As root@vnfe1 setenv U jmakkar setenv G choptuik cp ~phys410/.cshrc ~${U}; chown ${U}.${G} ~${U}/.cshrc ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # As matt@vnfe1 etc sola; vs # w2LTo6AzQDVkg vnDistEtc shadow setenv U jmakkar setenv G choptuik ssh root@vnfe1 "cd ~${U}; echo ${U}@physics.ubc.ca > .forward; chown ${U}.${G} .forward; ls -al; finger ${U}" vnCommand finger ${U} # OK ############################################################ Fri Sep 30 16:08:17 PDT 2005 ############################################################ #CRASH_193 Fri Sep 30 16:04:35 PDT 2005 # In machine room vn6 down 1+04:10 # KVM:2 on vn6 # Looks like a P/S ... fan, but no disk light, decable, # send message to Jason and Ben # See README.CRASH (CRASH_193) ############################################################ Mon Oct 3 14:43:02 PDT 2005 ############################################################ # /etc on vn41 is read only, hope that we can reboot # immediately! # As root@vn41 reboot # Hook it up to KVM:2 # Needed hard reboot, and had filesystem errors that seem to # be fixed up whowason vn41 # As root@vn41 jj ntp # OK, ntptimeset Your clock is off by 0.0319875 seconds. (142.103.237.225) [15/15] # OK # TODO: get bh9 out of /etc/fstab on nodes # See README.CRASH (CRASH_194) vnDistEtc passwd shadow group hosts.allow hosts.deny ############################################################ Mon Oct 3 14:32:19 PDT 2005 ############################################################ # New account for Scott Cheng-Hsin Yang, UBC PHAS UG (Rottler) # # requested firekg, but second choice is chsy, which is # what he is on physics ############################################################ # From physics chsy:x:13251:400:Cheng Hsin Scott Yang:/home2/chsy:/bin/tcsh chsy:rWcX.xReFa.j2:12299:::::: # As matt@vnfe1 cd vn Arc README.USERS vi README.USERS 13251 chsy # Scott Cheng-Hsin Yang, UBC PHAS UG (Rottler) nu setenv U chsy cat<${U} ${U}:x:13251:1100:Scott Cheng-Hsin Yang:/d/vnfe1/home/${U}:/bin/tcsh END vnNewUsers ${U} # As root@vnfe1 setenv U chsy setenv G rottler cp ~phys410/.cshrc ~${U}; chown ${U}.${G} ~${U}/.cshrc ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # As matt@vnfe1 etc sola; vs # rWcX.xReFa.j2 vnDistEtc shadow setenv U chsy setenv G rottler ssh root@vnfe1 "cd ~${U}; echo ${U}@physics.ubc.ca > .forward; chown ${U}.${G} .forward; ls -al; finger ${U}" vnCommand finger ${U} # OK # As matt@vnfe1 nu cd Blurbs cp jrottler chsy vi !$ scp chsy matt@bh0:/tmp # As matt@bh0- pine ############################################################ # New account for Alex Shyr UBC PHAS UG (Rottler) # # requested shyr as on physics # what he is on physics ############################################################ # From physics # As choptuik@physics shyr:x:12807:400:Alex Yu Jen Shyr:/home2/shyr:/bin/tcsh shyr:GXB0H0KD.jG/M:11579::::::-1 # As matt@vnfe1 cd vn Arc README.USERS vi README.USERS 12807 shyr # Scott Cheng-Hsin Yang, UBC PHAS UG (Rottler) nu setenv U shyr cat<${U} ${U}:x:12807:1100:Alex Yu Jen Shyr:/d/vnfe1/home/${U}:/bin/tcsh END vnNewUsers ${U} # As root@vnfe1 setenv U shyr setenv G rottler cp ~phys410/.cshrc ~${U}; chown ${U}.${G} ~${U}/.cshrc ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # As matt@vnfe1 etc sola; vs # GXB0H0KD.jG/M vnDistEtc shadow setenv U shyr setenv G rottler ssh root@vnfe1 "cd ~${U}; echo ${U}@physics.ubc.ca > .forward; chown ${U}.${G} .forward; ls -al; finger ${U}" vnCommand finger ${U} # OK # Duplicate on C2 # As matt@vnfe1 nu cd Blurbs cp chsy shyr vi !$ scp shyr matt@bh0:/tmp # As matt@bh0- pine vnallCommand "grep '^shyr' /etc/shadow; grep '^chsy' /etc/shadow" ############################################################ Thu Oct 6 07:31:48 PDT 2005 ############################################################ # TODO: vn41 down again, swap identity with vn62 # See README.CRASH (CRASH_195) # vn62 (old vn41) comes up, logs indicate kernel bug # Suspect memory. Have Jason/Ben swap memory with vn63/64 ############################################################ ############################################################ Wed Oct 12 06:44:58 PDT 2005 ############################################################ down vn62 down 4+02:37 # So memory swap apparently didn't work. viw vnN cds; make export # disable vn62 etc cp motd.2005.10.07 motd.2005.10.12 vi !$ CP !$ motd vnDistEtc motd # See README.CRASH (CRASH_196) ############################################################ Sat Oct 15 06:41:56 PDT 2005 ############################################################ # Have vn62 in Henn 403 and will configure/test as bh16 # HN: bh16.physics.ubc.ca # IP: 142.103.234.72 # DNS: 142.103.236.1 # GW: 142.103.234.254 # NM: 255.255.255.0 drakconf reboot # Checking for new hardware takes LONG time! # In fact, is hanging on drakconf, will wait until have some # boot floppies, then will install 2006 ############################################################ Mon Oct 17 07:08:22 PDT 2005 ############################################################ # Installing :w whitney vn64 mariah vn63 ############################################################ Mon Oct 17 13:28:20 PDT 2005 ############################################################ # Reseting Steve P's password to its physics.ubc.ca value # As choptuik@physics.ubc.ca sudo pwentry steve:x:412:307:Steven Plotkin:/home/steve:/bin/bash steve:yEmUKu1Xevoos:11736::::::-1 # As matt@vnfe1 etc sola get shadow ############################################################ Wed Oct 19 10:37:00 PDT 2005 ############################################################ # Installing infamous "second drive", then will move the # information off of it, and "retire" it (mount it in wh0?) # As root@vn61 shutdown -h now # In machine room with Jason, who installs second disk in # v61 # on KVM:2 # No joy, recognized at boot, but then hangs, Jason lugs back # to Henn and will continue after Robb Mann's talk ############################################################ Thu Oct 20 13:52:35 PDT 2005 ############################################################ # Tony from Varsity came to machine room at 13:00 with # BIOS update floppy. BIOS's flashed and machines rebooted # in about 10 minutes flat. # # Update did the trick. Boot up is at least an order of # magnitude faster. # vn63 and vn64 now up and running, hacked vnDistEtc # to "Plotkinize" nodes; i.e. to advise vis a vis # /etc/motd that Plotkin users have priority # Restoration of data # vn63: nee mariah # vn64: nee whitney drwxr-xr-x 10 root root 4096 Jul 12 22:07 /d/vnfe4/home/whitney/ drwxr-xr-x 9 root root 4096 Jun 23 23:23 /d/vnfe4/home/mariah/ !!ls -ltd /d/vnfe4/home/{*whitney*,*mariah*} drwxr-xr-x 10 root root 4096 Jul 12 22:07 /d/vnfe4/home/whitney/ drwxr-xr-x 9 root root 4096 Jun 23 23:23 /d/vnfe4/home/mariah/ /d/vnfe4/home/whitney: total 36 drwxr-xr-x 69 root root 8192 Jul 12 16:24 etc/ drwxr-xr-x 3 root root 4096 Jul 12 16:23 usr/ drwxr-x--- 24 root root 4096 Jul 12 15:33 root/ drwxr-xr-x 5 root root 4096 Mar 20 2005 backup/ drwxr-xr-x 7 root root 4096 Dec 17 2004 home/ drwxr-xr-x 18 root root 4096 Oct 8 2004 var/ drwxr-xr-x 4 root root 4096 Oct 8 2004 boot/ drwxr-xr-x 2 root root 4096 Apr 14 2004 misc/ /d/vnfe4/home/mariah: total 32 drwxr-xr-x 6 root root 4096 Jun 24 12:48 home/ drwxr-xr-x 3 root root 4096 Jun 23 21:23 usr/ drwxr-x--- 18 root root 4096 Jun 23 16:44 root/ drwxr-xr-x 70 root root 8192 Jun 23 16:28 etc/ drwxr-xr-x 3 root root 4096 Mar 15 2005 backup/ drwxr-xr-x 18 root root 4096 Oct 14 2004 var/ drwxr-xr-x 2 root root 4096 Apr 14 2004 misc/ # As root@{vn63,vn64} # TODO: Before we restore /home's need to rationalize user numbers # TODO: Actually, should talk to Steve about how this should be # done since a couple of machines could profitably be used # as front ends. # No ... go ahead and restore # {/home,/backup,/misc} #----------------------------------------------------------------------- # As root@vn63 cd / mv home home.O cd /d/vnfe4/home/mariah/ cp -a home backup misc / 32G home 17G backup 4.0k misc # As root@vn63 cd / foreach i (`iota 1000`) date du -hs home backup misc sleep 30 echo end # As root@head cd /d/vnfe4/home/mariah du -hs home backup misc #----------------------------------------------------------------------- # As root@vn64 # TODO: LOOP (!) cd / mv home home.O cd /d/vnfe4/home/whitney/ cp -a home backup misc / # As root@head cd /d/vnfe4/home/whitney du -hs home backup misc 62G home 39G backup 4.0k misc # As root@vn64 cd / foreach i (`iota 1000`) date du -hs home backup misc sleep 30 echo end # vn64(whitney) cp keeps crapping out, hope it's a bad driver, otherwise # it could be a bad NIC # Using e100, need to update # What are BIOS settings? Has loaded SMP kernel, but is that due to # hyperthreading? Probably, in which case leave it? # As root@{vn63,vn64} # vn64 down for the second time. Need to go check cable connection, # but more importantly, ensure that there are updated e100.o drivers # TODO: # # As root@{vn63,vn64} # Network actually came back cd /usr/src/linux view Makefile make # TODO OK? vn63 # TODO OK? vn64 # As root@{vn63,vn64} mkdir -p /root/install mkdir -p /d/bh0/home mount bh0:/home !$ ls -lt /d/bh0/home/matt/system/INTEL/Drivers/e100-3.4.14.tar.gz; setenv PACK e100-3.4.14; cd /root/install; /bin/rm -rf $PACK*; ls -ltd $PACK*; tar zxf /d/bh0/home/matt/system/INTEL/Drivers/e100-3.4.14.tar.gz; cd $PACK; pwd; ls setenv PACK e100-3.4.14; cd /root/install/$PACK/src; make; make install; pwd; ls -lt # OK?? updatedb slocate '^e100.ko' | tee /tmp/`hostname -s`-e100-ko slocate '^e100.o' | tee /tmp/`hostname -s`-e100-o slocate e100.ko /root/install/e100-3.4.14/src/e100.ko /root/install/e100-3.4.14/src/.e100.ko.cmd /usr/src/linux-2.6.12-12mdk/drivers/net/e100.ko /usr/src/linux-2.6.12-12mdk/drivers/net/.e100.ko.cmd /lib/modules/2.6.12-12mdksmp/kernel/drivers/net/e100/e100.ko diff /lib/modules/2.6.12-12mdksmp/kernel/drivers/net/e100/e100.ko /root/install/e100-3.4.14/src/e100.ko # OK! diff /lib/modules/2.6.12-12mdksmp/kernel/drivers/net/e100/e100.ko /usr/src/linux-2.6.12-12mdk/drivers/net/e100.ko Files /lib/modules/2.6.12-12mdksmp/kernel/drivers/net/e100/e100.ko and /usr/src/linux-2.6.12-12mdk/drivers/net/e100.ko differ # Right on, right on, right on! # As root@vn64 reboot # ... and hope for the best # Thu Oct 20 15:54:03 PDT 2005 # Thu Oct 20 15:54:48 PDT 2005 # ... so about a minute for boot up foreach i (`iota 1000`) date | tee -a /tmp/l ping -c 1 vn64 | tee -a /tmp/l sleep 5 --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.209/0.209/0.209/0.000 ms Thu Oct 20 15:53:48 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, 100% packet loss, time 0ms Thu Oct 20 15:54:03 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. From (142.103.237.63) icmp_seq=1 Destination Host Unreachable --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Thu Oct 20 15:54:11 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. From (142.103.237.63) icmp_seq=1 Destination Host Unreachable --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Thu Oct 20 15:54:19 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. From (142.103.237.63) icmp_seq=1 Destination Host Unreachable --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Thu Oct 20 15:54:27 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. From (142.103.237.63) icmp_seq=1 Destination Host Unreachable --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Thu Oct 20 15:54:35 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. From (142.103.237.63) icmp_seq=1 Destination Host Unreachable --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Thu Oct 20 15:54:43 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. 64 bytes from vn64.physics.ubc.ca (142.103.237.64): icmp_seq=1 ttl=64 time=1.96 ms --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 1.960/1.960/1.960/0.000 ms Thu Oct 20 15:54:48 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. 64 bytes from vn64.physics.ubc.ca (142.103.237.64): icmp_seq=1 ttl=64 time=0.191 ms --- vn64.physics.ubc.ca ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.191/0.191/0.191/0.000 ms Thu Oct 20 15:54:53 PDT 2005 PING vn64.physics.ubc.ca (142.103.237.64) 56(84) bytes of data. 64 bytes from vn64.physics.ubc.ca (142.103.237.64): icmp_seq=1 ttl=64 time=0.113 ms # ntpd needs to be installed, so need to export /mandrake # As root@boson cd /etc; /bin/rm exports.O; cp exports exports.O; vi exports; # urpmi config looks ok ssh root@vn64 'urpmi rwhod' # ... and vn64 has bitten it again. Looks like a bad NIC? # USERS # alena 506 -> # matthias 514 -> # steve 500 -> # reza 502 -> # yuri 510 -> # As choptuik@physics # As root@vnfe4 find / -type f -exec grep -il exercise {} \; | tee /tmp/exercise #----------------------------------------------------------------------- Fri Oct 21 10:10:49 PDT 2005 #----------------------------------------------------------------------- # Following machine room visit, and disabling of hyperthreading, have # rebuilt kernel (NON-smp), TODO: need to make default, and reboot, then # update e100.o, and try restoring whitney # As root@{vn63,vn64} view /etc/lilo.conf # Probably isn't quite right mkdir -p /lib/modules/2.6.12-12mdk; cd /usr/src/linux; make install # TODO: Should just go over to the machine room with installation disk # install with hyper disabled, will then presumably get the correct # kernel ############################################################ Sat Oct 22 14:16:40 PDT 2005 ############################################################ # vn42 has been down for several days. Sat Oct 22 14:13:06 PDT 2005 vn42 down 4+08:35 vn61 down 3+03:38 vn62 down 14+10:10 # See README.CRASH (CRASH_197: Software?) ############################################################ # Sat Oct 22 10:26:53 PDT 2005 # # Kernel Installation procedure ############################################################ # As matt@vnfe1 vr 64 # As root@vn64 cd /usr/src/linux; ls 3rdparty/ fs/ Makefile rsbac/ arch/ include/ Makefile.2005-10-21-0933-55-657336 scripts/ Archive/ init/ Makefile.O security/ COPYING ipc/ mm/ sound/ CREDITS kdb/ Module.symvers System.map crypto/ kernel/ net/ usr/ Documentation/ lib/ README vmlinux* drivers/ MAINTAINERS REPORTING-BUGS ################################################################################ ## Linux kernel release 2.6.xx ################################################# Linux kernel release 2.6.xx These are the release notes for Linux version 2.6. Read them carefully, as they tell you what this is all about, explain how to install the kernel, and what to do if something goes wrong. WHAT IS LINUX? Linux is a Unix clone written from scratch by Linus Torvalds with assistance from a loosely-knit team of hackers across the Net. It aims towards POSIX compliance. It has all the features you would expect in a modern fully-fledged Unix, including true multitasking, virtual memory, shared libraries, demand loading, shared copy-on-write executables, proper memory management and TCP/IP networking. It is distributed under the GNU General Public License - see the accompanying COPYING file for more details. ON WHAT HARDWARE DOES IT RUN? Linux was first developed for 386/486-based PCs. These days it also runs on ARMs, DEC Alphas, SUN Sparcs, M68000 machines (like Atari and Amiga), MIPS and PowerPC, and others. DOCUMENTATION: - There is a lot of documentation available both in electronic form on the Internet and in books, both Linux-specific and pertaining to general UNIX questions. I'd recommend looking into the documentation subdirectories on any Linux FTP site for the LDP (Linux Documentation Project) books. This README is not meant to be documentation on the system: there are much better sources available. - There are various README files in the Documentation/ subdirectory: these typically contain kernel-specific installation notes for some drivers for example. See Documentation/00-INDEX for a list of what is contained in each file. Please read the Changes file, as it contains information about the problems, which may result by upgrading your kernel. - The Documentation/DocBook/ subdirectory contains several guides for kernel developers and users. These guides can be rendered in a number of formats: PostScript (.ps), PDF, and HTML, among others. After installation, "make psdocs", "make pdfdocs", or "make htmldocs" will render the documentation in the requested format. INSTALLING the kernel: - If you install the full sources, put the kernel tarball in a directory where you have permissions (eg. your home directory) and unpack it: gzip -cd linux-2.6.XX.tar.gz | tar xvf - Replace "XX" with the version number of the latest kernel. Do NOT use the /usr/src/linux area! This area has a (usually incomplete) set of kernel headers that are used by the library header files. They should match the library, and not get messed up by whatever the kernel-du-jour happens to be. - You can also upgrade between 2.6.xx releases by patching. Patches are distributed in the traditional gzip and the new bzip2 format. To install by patching, get all the newer patch files, enter the top level directory of the kernel source (linux-2.6.xx) and execute: gzip -cd ../patch-2.6.xx.gz | patch -p1 or bzip2 -dc ../patch-2.6.xx.bz2 | patch -p1 (repeat xx for all versions bigger than the version of your current source tree, _in_order_) and you should be ok. You may want to remove the backup files (xxx~ or xxx.orig), and make sure that there are no failed patches (xxx# or xxx.rej). If there are, either you or me has made a mistake. Alternatively, the script patch-kernel can be used to automate this process. It determines the current kernel version and applies any patches found. linux/scripts/patch-kernel linux The first argument in the command above is the location of the kernel source. Patches are applied from the current directory, but an alternative directory can be specified as the second argument. - Make sure you have no stale .o files and dependencies lying around: cd linux make mrproper You should now have the sources correctly installed. SOFTWARE REQUIREMENTS Compiling and running the 2.6.xx kernels requires up-to-date versions of various software packages. Consult Documentation/Changes for the minimum version numbers required and how to get updates for these packages. Beware that using excessively old versions of these packages can cause indirect errors that are very difficult to track down, so don't assume that you can just update packages when obvious problems arise during build or operation. BUILD directory for the kernel: When compiling the kernel all output files will per default be stored together with the kernel source code. Using the option "make O=output/dir" allow you to specify an alternate place for the output files (including .config). Example: kernel source code: /usr/src/linux-2.6.N build directory: /home/name/build/kernel To configure and build the kernel use: cd /usr/src/linux-2.6.N make O=/home/name/build/kernel menuconfig make O=/home/name/build/kernel sudo make O=/home/name/build/kernel modules_install install Please note: If the 'O=output/dir' option is used then it must be used for all invocations of make. CONFIGURING the kernel: Do not skip this step even if you are only upgrading one minor version. New configuration options are added in each release, and odd problems will turn up if the configuration files are not set up as expected. If you want to carry your existing configuration to a new version with minimal work, use "make oldconfig", which will only ask you for the answers to new questions. - Alternate configuration commands are: "make menuconfig" Text based color menus, radiolists & dialogs. "make xconfig" X windows (Qt) based configuration tool. "make gconfig" X windows (Gtk) based configuration tool. "make oldconfig" Default all questions based on the contents of your existing ./.config file. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can under some circumstances lead to problems: probing for a nonexistent controller card may confuse your other controllers - compiling the kernel with "Processor type" set higher than 386 will result in a kernel that does NOT work on a 386. The kernel will detect this on bootup, and give up. - A kernel with math-emulation compiled in will still use the coprocessor if one is present: the math emulation will just never get used in that case. The kernel will be slightly larger, but will work on different machines regardless of whether they have a math coprocessor or not. - the "kernel hacking" configuration details usually result in a bigger or slower kernel (or both), and can even make the kernel less stable by configuring some routines to actively try to break bad code to find kernel problems (kmalloc()). Thus you should probably answer 'n' to the questions for "development", "experimental", or "debugging" features. - Check the top Makefile for further site-dependent configuration (default SVGA mode etc). COMPILING the kernel: - Make sure you have gcc 2.95.3 available. gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile some parts of the kernel, and are *no longer supported*. Also remember to upgrade your binutils package (for as/ld/nm and company) if necessary. For more information, refer to Documentation/Changes. Please note that you can still run a.out user programs with this kernel. - Do a "make" to create a compressed kernel image. It is also possible to do "make install" if you have lilo installed to suit the kernel makefiles, but you may want to check your particular lilo setup first. # Backup old kernel and modules # Get kernel release using uname KERNEL=`uname -r` /boot/vmlinuz make make modules_install lilo Added linux * Added linux-nonfb Added 2.6.12-12 Added failsafe cat /etc/lilo.conf default="linux" boot=/dev/sda map=/boot/map keytable=/boot/us.klt menu-scheme=wb:bw:wb:bw prompt nowarn timeout=100 message=/boot/message image=/boot/vmlinuz label="linux" root=/dev/sda5 initrd=/boot/initrd.img append="resume=/dev/sda1 splash=silent" vga=788 To do the actual install you have to be root, but none of the normal build should require that. Don't take the name of root in vain. - If you configured any of the parts of the kernel as `modules', you will also have to do "make modules_install". - Keep a backup kernel handy in case something goes wrong. This is especially true for the development releases, since each new release contains new code which has not been debugged. Make sure you keep a backup of the modules corresponding to that kernel, as well. If you are installing a new kernel with the same version number as your working kernel, make a backup of your modules directory before you do a "make modules_install". - In order to boot your new kernel, you'll need to copy the kernel image (e.g. .../linux/arch/i386/boot/bzImage after compilation) to the place where your regular bootable kernel is found. - Booting a kernel directly from a floppy without the assistance of a bootloader such as LILO, is no longer supported. If you boot Linux from the hard drive, chances are you use LILO which uses the kernel image as specified in the file /etc/lilo.conf. The kernel image file is usually /vmlinuz, /boot/vmlinuz, /bzImage or /boot/bzImage. To use the new kernel, save a copy of the old image and copy the new image over the old one. Then, you MUST RERUN LILO to update the loading map!! If you don't, you won't be able to boot the new kernel image. Reinstalling LILO is usually a matter of running /sbin/lilo. You may wish to edit /etc/lilo.conf to specify an entry for your old kernel image (say, /vmlinux.old) in case the new one does not work. See the LILO docs for more information. After reinstalling LILO, you should be all set. Shutdown the system, reboot, and enjoy! If you ever need to change the default root device, video mode, ramdisk size, etc. in the kernel image, use the 'rdev' program (or alternatively the LILO boot options when appropriate). No need to recompile the kernel to change these parameters. - Reboot with the new kernel and enjoy. IF SOMETHING GOES WRONG: - If you have problems that seem to be due to kernel bugs, please check the file MAINTAINERS to see if there is a particular person associated with the part of the kernel that you are having trouble with. If there isn't anyone listed there, then the second best thing is to mail them to me (torvalds@osdl.org), and possibly to any other relevant mailing-list or to the newsgroup. - In all bug-reports, *please* tell what kernel you are talking about, how to duplicate the problem, and what your setup is (use your common sense). If the problem is new, tell me so, and if the problem is old, please try to tell me when you first noticed it. - If the bug results in a message like unable to handle kernel paging request at address C0000010 Oops: 0002 EIP: 0010:XXXXXXXX eax: xxxxxxxx ebx: xxxxxxxx ecx: xxxxxxxx edx: xxxxxxxx esi: xxxxxxxx edi: xxxxxxxx ebp: xxxxxxxx ds: xxxx es: xxxx fs: xxxx gs: xxxx Pid: xx, process nr: xx xx xx xx xx xx xx xx xx xx xx or similar kernel debugging information on your screen or in your system log, please duplicate it *exactly*. The dump may look incomprehensible to you, but it does contain information that may help debugging the problem. The text above the dump is also important: it tells something about why the kernel dumped code (in the above example it's due to a bad kernel pointer). More information on making sense of the dump is in Documentation/oops-tracing.txt - If you compiled the kernel with CONFIG_KALLSYMS you can send the dump as is, otherwise you will have to use the "ksymoops" program to make sense of the dump. This utility can be downloaded from ftp://ftp..kernel.org/pub/linux/utils/kernel/ksymoops. Alternately you can do the dump lookup by hand: - In debugging dumps like the above, it helps enormously if you can look up what the EIP value means. The hex value as such doesn't help me or anybody else very much: it will depend on your particular kernel setup. What you should do is take the hex value from the EIP line (ignore the "0010:"), and look it up in the kernel namelist to see which kernel function contains the offending address. To find out the kernel function name, you'll need to find the system binary associated with the kernel that exhibited the symptom. This is the file 'linux/vmlinux'. To extract the namelist and match it against the EIP from the kernel crash, do: nm vmlinux | sort | less This will give you a list of kernel addresses sorted in ascending order, from which it is simple to find the function that contains the offending address. Note that the address given by the kernel debugging messages will not necessarily match exactly with the function addresses (in fact, that is very unlikely), so you can't just 'grep' the list: the list will, however, give you the starting point of each kernel function, so by looking for the function that has a starting address lower than the one you are searching for but is followed by a function with a higher address you will find the one you want. In fact, it may be a good idea to include a bit of "context" in your problem report, giving a few lines around the interesting one. If you for some reason cannot do the above (you have a pre-compiled kernel image or similar), telling me as much about your setup as possible will help. - Alternately, you can use gdb on a running kernel. (read-only; i.e. you cannot change values or set break points.) To do this, first compile the kernel with -g; edit arch/i386/Makefile appropriately, then do a "make clean". You'll also need to enable CONFIG_PROC_FS (via "make config"). After you've rebooted with the new kernel, do "gdb vmlinux /proc/kcore". You can now use all the usual gdb commands. The command to look up the point where your system crashed is "l *0xXXXXXXXX". (Replace the XXXes with the EIP value.) gdb'ing a non-running kernel currently fails because gdb (wrongly) disregards the starting offset for which the kernel is compiled. vnKernelInstall -c | tee /tmp/vnKernelInstall # OK vnKernelInstall | tee /tmp/vnKernelInstall # OK (except that 'make clean' wasn't behind an Exec!) # As root@vn63 vnKernelInstall | tee /tmp/vnKernelInstall make xconfig # Load .config and disable smp support # OK cd /usr/src/linux scp .config root@vn64:`pwd` ssh root@vn63 grep SMP /usr/src/linux/.config ssh root@vn64 grep SMP /usr/src/linux/.config Sat Oct 22 14:07:42 PDT 2005 # As root@vn64 vnKernelInstall -x # As root@vn63 vnKernelInstall -x # ... and away they go! Remains to be seen how the install works! # ... OK, living insanely stuipdly/dangerously, said 'y' to the reboot # whilst drakconf/urpmi was still going Kernel panic - not syncing: No init found. Try passing init= option to kernel. # Booted off of CD, updated, OK # # urpmi configuration # As root@{vn63,vn64} # As root@{vn63,vn64} drakconf 2006-mandrake /mandrake/2006/i586/media/main media_info/hdlist.cz 2006-update /mandrake/2006/updates/main_updates media_info/hdlist.cz 2006-contrib /mandrake/2006/i586/media/contrib media_info/hdlist.cz # TODO: TODO: IMPORTANT!! GOOD INDICATIONS THAT SHOULDN'T USE MORE THAN # ONE drakconf on (e.g.) /mandrake/ AT ANY GIVEN TIME # As root@{vn63,vn64} vr 63 urpmi rwho # OK # TODO: vr 64 urpmi who # TODO: Disable services on vn63, vn64 # As root@vn63 cd /root/install cp s svc # Hack svc svc # As matt@vnfe1 via a cg 'chkconfig --list | grep \!*' chkconfig rwhod on; service rwhod restart cg rwho cg ftp cg http cg smb cg uid # create rwhod user # As matt@vnfe1 etc get passwd vi passwd rwhod:x:9999:9999:rwhod user:/:/bin/false vnDistEtc passwd ssh root@vn63 'chown -R rwhod /var/spool/rwho; service rwhod restart' ssh root@vn64 'chown -R rwhod /var/spool/rwho; service rwhod restart' ssh root@vn63 chkconfig httpd off ssh root@vn64 chkconfig httpd off # OK, supposed to be at Rob's by now!! mkdir -p /root/install mkdir -p /d/bh0/home mount bh0:/home !$ ls -lt /d/bh0/home/matt/system/INTEL/Drivers/e100-3.4.14.tar.gz; setenv PACK e100-3.4.14; cd /root/install; /bin/rm -rf $PACK*; ls -ltd $PACK*; tar zxf /d/bh0/home/matt/system/INTEL/Drivers/e100-3.4.14.tar.gz; cd $PACK; pwd; ls setenv PACK e100-3.4.14; cd /root/install/$PACK/src; make; make install; pwd; ls -lt ssh vn63 reboot ssh vn64 rebot # Didn't come back!! #--------------------------------------------------------------------- Mon Oct 24 06:26:01 PDT 2005 #--------------------------------------------------------------------- # Both machines back with NEW module COMPILED but OLD module installed cd /root/install.../ make install # This is the crucial step that possibly the intel script does NOT # do? modprobe e100 reboot # OK # As root@vn64 /bin/rm -r /home cp -a /d/vnfe4/home/whitney /home # As root@vn64 cd /home/whitney du -hs 39G backup 3.7M boot 60M etc 62G home 4.0K misc 484M root 28M usr 63M var # As root@vnfe4 cd /home/whitney du -hs 39G backup 3.7M boot 60M etc 62G home 4.0k misc 484M root 28M usr 63M var # OK, looks good, do same with mariah and /home # As root@vn63 vr 63 /bin/rm -rf /home mkdir -p /home cp -a /d/vnfe4/home/mariah /home # As root@vn63 cd /home/mariah foreach i (`iota 1000`) date du -hs * echo sleep 30 end # As root@vnfe4 cd /home/mariah du -hs * 17G backup 58M etc 32G home 4.0k misc 3.8M root 6.2M usr 64M var Mon Oct 24 06:44:32 PDT 2005 # Head home for breakfast # alena 506 -> 750 # matthias 514 -> 466 # steve 500 -> 412 # reza 502 -> 601 # yuri 510 -> 568 # As choptuik@physics.ubc.ca sudo pwentry alena alena:x:750:307:Alena Shmygelska:/home2/alena:/bin/tcsh alena:PIyZ1vvgeb6M.:12755:::::: matthias matthias:x:466:307:Matthias Huber:/home/matthias:/bin/tcsh matthias:Ei9RTd2EhpAkU:12564:::::: steve steve:x:412:307:Steven Plotkin:/home/steve:/bin/bash steve:yEmUKu1Xevoos:11736::::::-1 reza -> ejtehadi Login name: ejtehadi In real life: Reza Ejtehadi Directory: /home/ejtehadi Shell: /bin/tcsh Last login Tue Nov 30, 2004 on pts/16 from 194.225.71.91 No unread mail No Plan. ejtehadi:x:601:307:Reza Ejtehadi:/home/ejtehadi:/bin/tcsh ejtehadi:**expired050930**0duDwcBD0G83A:11970:::::: yuri -> gusev Login name: gusev In real life: Yuri gusev Directory: /home/gusev Shell: /bin/tcsh Last login Mon Oct 24 08:46 on pts/63 from d66-183-135-169 No unread mail No Plan. gusev:x:568:307:Yuri gusev:/home/gusev:/bin/tcsh gusev:xKuddEgnb0Kes:12795:::::: # As root@{vn63,vn64} drakconf 2006-mandrake /mandrake/2006/i586/media/main media_info/hdlist.cz 2006-update /mandrake/2006/updates/main_updates media_info/hdlist.cz 2006-contrib /mandrake/2006/i586/media/contrib media_info/hdlist.cz # Generically getting this error Unable to add medium, errors reported: copy of [/mandrake/2006/i586/media/contrib/media_info/hdlist.cz] failed (md5sum mismatch) ssh root@vnfe4 vics setenv NEWU {alena,matthias,steve,reza,yuri} soc ls -ltd /home/$NEWU/ ls: /home/alena/: No such file or directory ls: /home/reza/: No such file or directory ls: /home/yuri/: No such file or directory drwxr-xr-x 7 matthias other 4096 Oct 12 2004 /home/matthias// drwx------ 2 steve choptuik 4096 May 14 2003 /home/steve// # Create accounts for alena reza yuri # As matt@vnfe1 nu # On /mariah, /whitney # alena 506 -> 750 # matthias 514 -> 466 # steve 500 -> 412 # reza 502 -> 601 # yuri 510 -> 568 # On /d/vnfe4 # steve 9054 -> 412 #=========================================================== # Account creation #=========================================================== # As matt@vnfe1 grep -i plotkin /etc/passwd plotkin:x:9054:9000:Steven Plotkin:/d/vnfe1/home/plotkin:/bin/bash grep -i matthias /etc/passwd matthias:!:466:9000:Matthias Huber:/d/vnfe1/home2/matthias:/bin/bash cp choptuik_group plotkin_group alena:x:750:307:Alena Shmygelska:/home2/alena:/bin/tcsh alena:PIyZ1vvgeb6M.:12755:::::: ejtehadi:x:601:307:Reza Ejtehadi:/home/ejtehadi:/bin/tcsh ejtehadi:**expired050930**0duDwcBD0G83A:11970:::::: gusev:x:568:307:Yuri gusev:/home/gusev:/bin/tcsh gusev:xKuddEgnb0Kes:12795:::::: steve:x:412:307:Steven Plotkin:/home/steve:/bin/bash steve:yEmUKu1Xevoos:11736::::::-1 matthias:x:466:307:Matthias Huber:/home/matthias:/bin/tcsh matthias:Ei9RTd2EhpAkU:12564:::::: vi plotkin_group gusev:x:568:9100:Yuri Gusev:/d/vnfe1/home/gusev:/bin/tcsh alena:x:750:9100:Alena Shmygelska:/d/vnfe1/home/alena:/bin/tcsh vnNewUsers plotkin_group # TODO: Need to mirror on C2 ssh gusev@vnfe1 ssh alena@vnfe1 # OK, and password changed etc sola get passwd vnDistEtc passwd # As root@vn64 foreach p (/whitney /mariah) cd $p find . -uid 506 -exec chown -R alena.plotkin {} \; find . -uid 514 -exec chown -R matthias.plotkin {} \; find . -uid 500 -exec chown -R steve.plotkin {} \; find . -uid 502 -exec chown -R ejtehadi.plotkin {} \; find . -uid 510 -exec chown -R gusev.plotkin {} \; end cd /d/vnfe1/home; chown -R steve.plotkin steve cd /d/vnfe4/home; chown -R steve.plotkin steve # Still had 9100 -> halpern, redo foreach p (/whitney /mariah) cd $p find . -uid 750 -exec chown -R alena.plotkin {} \; find . -uid 466 -exec chown -R matthias.plotkin {} \; find . -uid 611 -exec chown -R steve.plotkin {} \; find . -uid 601 -exec chown -R ejtehadi.plotkin {} \; find . -uid 568 -exec chown -R gusev.plotkin {} \; end cd /d/vnfe1/home; chown -R steve.plotkin steve cd /d/vnfe4/home; chown -R steve.plotkin steve # Oops, Steve can't be steve since Steve L is, will need to make # plotkin his second choice foreach p (/whitney /mariah) cd $p find . -uid 611 -exec chown -R plotkin.plotkin {} \; end cd /d/vnfe1/home; chown -R steve.choptuik steve cd /d/vnfe4/home; chown -R steve.choptuik steve chown -R plotkin.plotkin /d/{vnfe1,vnfe4}/home/plotkin chown -R steve.choptuik /d/{vnfe1,vnfe4}/home/steve # As root@vn63 vr 63 cd /etc Arc exports vi exports /mariah vn*.physics.ubc.ca(rw,async,no_root_squash) bh*.physics.ubc.ca(rw,async,no_root_squash) ln -s /home/mariah /mariah exportfs -av mkdir /whitney mount vn64:/whitney /whitney # As root@vn64 vr 64 cd /etc Arc exports vi exports /whitney vn*.physics.ubc.ca(rw,async,no_root_squash) bh*.physics.ubc.ca(rw,async,no_root_squash) ln -s /home/whitney /whitney exportfs -av mkdir /mariah mount vn63:/mariah /mariah # OK fstab entries for all nodes ################################################################################ Mon Oct 24 14:04:51 PDT 2005 ################################################################################ # Fix up fstab so that all nodes mount /mariah, /whitney. # vn63 (mariah): # vn64 (mariah): # # Special cases to be dealt with in vnDistEtc # As matt@vnfe1 viw vnDistEtc etc ls *fstab* fstab fstab.node fstab.node.novnfe2 fstab.vn64 fstab.cooperon fstab.node.2004.12.16 fstab.node.real fstab.vnfe1 fstab_frag fstab.node.new fstab.vn1 fstab.vnfe3 Arc fstab.node scp root@vn1:/etc/fstab fstab.node vi fstab.node vn63:/mariah /mariah nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vn64:/whitney /whitney nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 foreach h (vn63 vn64 vnfe1 vnfe3) scp root@${h}:/etc/fstab fstab.${h} ls -ltd fstab.${h} sleep 1 end vi fstab.node fstab.vn63 fstab.vn64 fstab.vnfe1 fstab.vnfe3 # TODO: vn61 vnallbgCommand 'mkdir -p /whitney /mariah' vnDistEtc fstab vnallCommand cat /etc/fstab | tee /tmp/vn-fstab vnallCommand 'mount -a' vnallCommand 'df' | tee /tmp/vn-df # As root@vnfe1 chown -R plotkin.plotkin /home2/plotkin ################################################################################ Mon Oct 24 17:13:20 PDT 2005 ################################################################################ # Cleaning up etc master # As matt@vnfe1 etc mkdir Motd mv motd.200[0-9]* Motd cd Motd cp motd.2005.10.24 .. cd .. cp motd.2005.10.24 motd.2005.10.24a vi motd.2005.10.24a cp motd.2005.10.24a motd viw vnN vnDistEtc motd passwd shadow group hosts.allow hosts.denycGcfYCV$5UBWrUXMF93jz7m7mLw7o. # OK vnallCommand 'ntptimeset; cd /whitney; pwd; ls; cd /mariah; pwd; ls' | tee /tmp/vnall ################################################################################ Wed Oct 26 08:15:28 PDT 2005 ################################################################################ # Need to have Mary Ann expand DNS entries for vn65-vn75 ################################################################################ Thu Oct 27 07:45:11 PDT 2005 ################################################################################ # Have essentially completed insertion of "elvis" (Steve P) # TODO: vnfe4:/home mount # SEE README.elvis # TODO: Compilers, MPI etc. (fix up that state vis a vis Maggie's complaint # from months ago) # 64-proc mpi run vnrun -n 64 cpi 2>&1|sort | tee /tmp/vnrun-65 3.763u 4.520s 0:27.63 29.9% 0+0k 0+0io 0pf+0w pi is approximately 3.1415926539002341, Error is 0.0000000003104410 Process 10 of 64 on vn35.physics.ubc.ca: n_intervals=-1 Process 11 of 64 on vn36.physics.ubc.ca: n_intervals=-1 Process 12 of 64 on vn3.physics.ubc.ca: n_intervals=-1 Process 13 of 64 on vn41.physics.ubc.ca: n_intervals=-1 Process 14 of 64 on vn42.physics.ubc.ca: n_intervals=-1 Process 15 of 64 on vn43.physics.ubc.ca: n_intervals=-1 Process 16 of 64 on vn44.physics.ubc.ca: n_intervals=-1 Process 17 of 64 on vn45.physics.ubc.ca: n_intervals=-1 Process 18 of 64 on vn46.physics.ubc.ca: n_intervals=-1 Process 19 of 64 on vn47.physics.ubc.ca: n_intervals=-1 Process 1 of 64 on vn21.physics.ubc.ca: n_intervals=16384 Process 20 of 64 on vn48.physics.ubc.ca: n_intervals=-1 Process 21 of 64 on vn49.physics.ubc.ca: n_intervals=-1 Process 22 of 64 on vn50.physics.ubc.ca: n_intervals=-1 Process 23 of 64 on vn51.physics.ubc.ca: n_intervals=-1 Process 24 of 64 on vn53.physics.ubc.ca: n_intervals=-1 Process 25 of 64 on vn54.physics.ubc.ca: n_intervals=-1 Process 26 of 64 on vn55.physics.ubc.ca: n_intervals=-1 Process 27 of 64 on vn57.physics.ubc.ca: n_intervals=-1 Process 28 of 64 on vn58.physics.ubc.ca: n_intervals=-1 Process 29 of 64 on vn59.physics.ubc.ca: n_intervals=-1 Process 2 of 64 on vn22.physics.ubc.ca: n_intervals=-1 Process 30 of 64 on vn5.physics.ubc.ca: n_intervals=-1 Process 31 of 64 on vn60.physics.ubc.ca: n_intervals=-1 Process 32 of 64 on vn61.physics.ubc.ca: n_intervals=-1 Process 33 of 64 on vn6.physics.ubc.ca: n_intervals=-1 Process 34 of 64 on vn7.physics.ubc.ca: n_intervals=-1 Process 35 of 64 on vn9.physics.ubc.ca: n_intervals=-1 Process 36 of 64 on vn20.physics.ubc.ca: n_intervals=-1 Process 37 of 64 on vn23.physics.ubc.ca: n_intervals=-1 Process 38 of 64 on vn27.physics.ubc.ca: n_intervals=-1 Process 39 of 64 on vn29.physics.ubc.ca: n_intervals=-1 Process 3 of 64 on vn24.physics.ubc.ca: n_intervals=-1 Process 40 of 64 on vn2.physics.ubc.ca: n_intervals=-1 Process 41 of 64 on vn31.physics.ubc.ca: n_intervals=-1 Process 42 of 64 on vn52.physics.ubc.ca: n_intervals=-1 Process 43 of 64 on vn8.physics.ubc.ca: n_intervals=-1 Process 44 of 64 on vn33.physics.ubc.ca: n_intervals=-1 Process 45 of 64 on vn38.physics.ubc.ca: n_intervals=-1 Process 46 of 64 on vn39.physics.ubc.ca: n_intervals=-1 Process 47 of 64 on vn40.physics.ubc.ca: n_intervals=-1 Process 48 of 64 on vn56.physics.ubc.ca: n_intervals=-1 Process 49 of 64 on vn37.physics.ubc.ca: n_intervals=-1 Process 4 of 64 on vn25.physics.ubc.ca: n_intervals=-1 Process 50 of 64 on vn4.physics.ubc.ca: n_intervals=-1 Process 51 of 64 on vn22.physics.ubc.ca: n_intervals=-1 Process 52 of 64 on vn24.physics.ubc.ca: n_intervals=-1 Process 53 of 64 on vn25.physics.ubc.ca: n_intervals=-1 Process 54 of 64 on vn26.physics.ubc.ca: n_intervals=-1 Process 55 of 64 on vn28.physics.ubc.ca: n_intervals=-1 Process 56 of 64 on vn30.physics.ubc.ca: n_intervals=-1 Process 57 of 64 on vn32.physics.ubc.ca: n_intervals=-1 Process 58 of 64 on vn34.physics.ubc.ca: n_intervals=-1 Process 59 of 64 on vn35.physics.ubc.ca: n_intervals=-1 Process 5 of 64 on vn26.physics.ubc.ca: n_intervals=-1 Process 60 of 64 on vn36.physics.ubc.ca: n_intervals=-1 Process 61 of 64 on vn3.physics.ubc.ca: n_intervals=-1 Process 62 of 64 on vn41.physics.ubc.ca: n_intervals=-1 Process 63 of 64 on vn42.physics.ubc.ca: n_intervals=-1 Process 64 of 64 on vn43.physics.ubc.ca: n_intervals=-1 Process 6 of 64 on vn28.physics.ubc.ca: n_intervals=-1 Process 7 of 64 on vn30.physics.ubc.ca: n_intervals=-1 Process 8 of 64 on vn32.physics.ubc.ca: n_intervals=-1 Process 9 of 64 on vn34.physics.ubc.ca: n_intervals=-1 vn2 vn20 vn22 vn23 vn24 vn25 vn26 vn27 vn28 vn29 vn3 vn30 vn31 vn32 vn33 vn34 vn35 vn36 vn37 vn38 vn39 vn4 vn40 vn41 vn42 vn43 vn44 vn45 vn46 vn47 vn48 vn49 vn5 vn50 vn51 vn52 vn53 vn54 vn55 vn56 vn57 vn58 vn59 vn6 vn60 vn61 vn7 vn8 vn9 wall clock time = 0.009690 Will execute 'ssh -x vn21 cd /d/vnfe1/home/matt/examples/cpi-mpi-intel; time mpirun -np 64 -machinefile mfile cpi ' Will use the following machine file ################################################################################ Fri Oct 28 07:53:38 PDT 2005 ################################################################################ # Need rwhod on vn65, and need to add new vn entries to # boson:/etc/{hosts.allow,exports} # As root@boson cd /etc cp hosts.allow hosts.alllow-2005-10-28 vi hosts.allow # Added portmap access for 142.103.237.65 through 142.103.237.75 # inclusive (vn65.physics.ubc.ca through vn75.physics.ubc.ca) service xinetd restart # exports should be OK # As root@vn65 mount -a # OK, /mandrake mounted drakconf # Updated urpmi --auto --auto-select --force rwhod # ... failed, since it should be 'rwhod', but nonetheless results in # package installation (presumably due to the force), including chkconfig --list | grep squid squid 0:off 1:off 2:off 3:off 4:off 5:off 6:off urpmi --auto --auto-select --force rwho chkconfig rwhod on; service rwhod start chown -R rwhod /var/spool/rwho service rwhod restart ruptime # OK ############################################################ Wed Nov 2 10:37:50 PST 2005 ############################################################ # vn16 was incommunicado, in machine room on KVM:2, can log # SEE README.CRASH [CRASH_198] ############################################################ # New account for Adrian Cortes # # UBC ZOOL UG (Pineda) # # cortes ############################################################ cd vn Arc README.USERS vi README.USERS 9080 cortes # Arian Cortes (UBC ZOOL UG Pineda) nu setenv U cortes cat<${U} ${U}:x:9080:9000:Adrian Cortes:/d/vnfe1/home/${U}:/bin/bash END vnNewUsers ${U} # As root@vnfe1 setenv U cortes setenv G other cp ~phys410/.profile ~phys410/.aliases.bash ~${U}; chown ${U}.${G} ~${U}/{.profile,.aliases.bash} ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # TODO: # As matt@vnfe1 etc sola; vs # $1$AcGcfYCV$5UBWrUXMF93jz7m7mLw7o. vnDistEtc shadow # As root@vnfe1 setenv U cortes setenv G other cd ~${U} cat<.forward cortes@interchange.ubc.ca pineda@zoology.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} vnCommand finger ${U} # OK # Duplicate on C2 # As matt@vnfe1 nu cd Blurbs cp shyr cortes vi !$ scp cortes matt@bh0:/tmp # As matt@bh0- pine vnallCommand grep '^cortes' /etc/shadow ############################################################ Sun Nov 27 06:40:30 PST 2005 ############################################################ # New accounts for Alena Shmygelsak (UBC PHAS PDF Plotkin) Shirin Hadizadeh (UBC PHAS GS Plotkin) # As choptuik@physics.ubc.ca finger alena Login name: alena In real life: Alena Shmygelska Directory: /home2/alena Shell: /bin/tcsh Never logged in. No unread mail No Plan. finger shirin Login name: shirin In real life: Shirin Hadizadeh Directory: /home2/shirin Shell: /bin/tcsh Last login Mon Oct 24 21:38 on pts/76 from s0106000f3d64da No unread mail No Plan. sudo pwentry alena alena:x:750:307:Alena Shmygelska:/home2/alena:/bin/tcsh alena:PIyZ1vvgeb6M.:12755:::::: shirin shirin:x:646:307:Shirin Hadizadeh:/home2/shirin:/bin/tcsh shirin:dC8YD/8biKj86:13032:::::: # As root@vnfe1 grep 750 /etc/passwd alena:x:750:9100:Alena Shmygelska:/d/vnfe1/home/alena:/bin/tcsh # OK, verify/update shadow entry and send message # indicating that account already exists # As matt@vnfe1 etc sola get shadow vs grep 646 /etc/passwd cd vn Arc README.USERS vi README.USERS 646 shirin # Shirin Hadizadeh (UBC PHAS GS Plotkin) nu setenv U shirin cat<${U} ${U}:x:646:9200:Shirin Hadizadeh:/d/vnfe1/home/${U}:/bin/tcsh END vnNewUsers ${U} # As root@vnfe1 setenv U shirin setenv G plotkin cp ~phys410/.cshrc ~phys410/.aliases ~${U}; chown ${U}.${G} ~${U}/{.cshrc,.aliases} ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # As matt@vnfe1 etc sola; vs # dC8YD/8biKj86 vnDistEtc shadow # As root@vnfe1 setenv U alena setenv G plotkin cd ~${U} cat<.forward alena@phas.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} vnCommand finger ${U} setenv U shirin setenv G plotkin cd ~${U} cat<.forward shirin@phas.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} vnCommand finger ${U} # OK # TODO: Duplicate on C2 # As matt@vnfe1 nu cd Blurbs cp cortes alena-shirin vi !$ scp alena-shirin matt@bh0:/tmp # As matt@bh0- pine vnallCommand grep '^alena' /etc/shadow vnallCommand grep '^shirin' /etc/shadow # OK ############################################################ Wed Nov 30 14:05:51 PST 2005 ############################################################ # Alena had forgotten physics password, has been reset now # needs to be propagated # As choptuik@physics sudo pwentry alena:x:750:307:Alena Shmygelska:/home2/alena:/bin/tcsh alena:pjYoZ9UDgME/Q:13117:::::: # XXX: pjYoZ9UDgME/Q # As matt@vnfe1 etc sola get shadow vs pjYoZ9UDgME/Q vnDistEtc shadow # As matt@bh0 etcc2 sola get shadow vs pjYoZ9UDgME/Q c2DistEtc shadow ############################################################ Sun Dec 4 14:02:23 PST 2005 ############################################################ New account for Sandip Pal UBC CHEM PDF Patey vi README.USERS 1256 sandip # Sandip Paul nu cat<sandip sandip:x:1256:1200:Sandip Paul:/d/vnfe3/home/sandip:/bin/bash END vnNewUsers sandip ssh sandip@vnfe1 datDe # OK ssh sandip@vn35 date # OK # TODO etc sola; vs # $1$ZhCSJ5uM$XPDMFfZ.M3RL2wOMLHfPe1 vnDistEtc shadow ssh root@vnfe1 'cd ~sandip; echo spaul@chem.ubc.ca > .forward; chown sandip.patey .forward; ls -al; cat .forward' # TODO: DUPLICATE ACCOUNT ON vnfe4 # As matt@vnfe1 nu cd Blurbs cp chemming sandip vi sandip Rcp sandip # TODO: Send message, with CC to Patey ############################################################ Sun Jan 22 07:19:00 PST 2006 ############################################################ # Continuing incorporation of Steve Plotkin's machines # # eminem.physics.ubc.ca --> vn66.physics.ubc.ca # Temporarily configured as # IP: 142.103.234.190 # HN: bhtest1.physics.ubc.ca # GW: 142.103.234.254 # NM: 255.255.255.0 # D1: 137.82.1.1 # D2: 142.103.236.1 # xtina.physics.ubc.ca --> vn67.physics.ubc.ca # Temporarily configured as # IP: 142.103.234.191 # HN: bhtest2.physics.ubc.ca # GW: 142.103.234.254 # NM: 255.255.255.0 # D1: 137.82.1.1 # D2: 142.103.236.1 # As matt@vnfe1 cds Arc secondary-vn # Looks ok, but first configure urmpi to use Bill's distro on # both # Now have 'help' for 'urpmi' on bh0 # As matt@bh0 help urpmi # As root@bhtest1 mkdir -p /mandrake mount boson:/mandrake /mandrake drakconf 2006-mandrake /mandrake/2006/i586/media/main media_info/hdlist.cz 2006-update /mandrake/2006/updates/main_updates media_info/hdlist.cz 2006-contrib /mandrake/2006/i586/media/contrib media_info/hdlist.cz # AS root@bhtest1 shutdown -h now # TAke off ethernet, bring up and reconfigure as # eminem.physics.ubc.ca --> vn66.physics.ubc.ca # Configure as # IP: 142.103.237.66 # HN: vn66.physics.ubc.ca # GW: 142.103.237.254 # NM: 255.255.255.0 # D1: 137.82.1.1 # D2: 137.82.28.3 # D3: 142.103.236.1 # As root@bhtest2 mkdir -p /mandrake mount boson:/mandrake /mandrake drakconf 2006-mandrake /mandrake/2006/i586/media/main media_info/hdlist.cz 2006-update /mandrake/2006/updates/main_updates media_info/hdlist.cz 2006-contrib /mandrake/2006/i586/media/contrib media_info/hdlist.cz # AS root@bhtest1 shutdown -h now # TAke off ethernet, bring up and reconfigure as # xtina.physics.ubc.ca --> vn67.physics.ubc.ca # Configure as # IP: 142.103.237.67 # HN: vn67.physics.ubc.ca # GW: 142.103.237.254 # NM: 255.255.255.0 # D1: 137.82.1.1 # D2: 137.82.28.3 # D3: 142.103.236.1 Wed Feb 22 00:10:24 PST 2006 # As matt@vnfe1 ssh root@vn66 'hostname -s; df; cat /proc/cpuinfo' | tee -a /tmp/vn ssh root@vn67 'hostname -s; df; cat /proc/cpuinfo' | tee -a /tmp/vn #----------------------------------------------------------------------- vn66 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 191264348 2998388 178550252 2% / vnfe4:/home 1014438800 831826488 131081816 87% /d/vnfe4/home boson:/mandrake 115377640 107515144 2001584 99% /mandrake processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Xeon(TM) CPU 2.40GHz stepping : 7 cpu MHz : 2392.851 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr bogomips : 4734.97 processor : 1 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Xeon(TM) CPU 2.40GHz stepping : 7 cpu MHz : 2392.851 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr bogomips : 4767.74 #----------------------------------------------------------------------- vn67 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 58220736 50320428 54% / vnfe4:/home 1014438800 831826488 131081816 87% /d/vnfe4/home boson:/mandrake 115377640 107515144 2001584 99% /mandrake processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Xeon(TM) CPU 2.80GHz stepping : 7 cpu MHz : 2790.703 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr bogomips : 5521.40 processor : 1 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Xeon(TM) CPU 2.80GHz stepping : 7 cpu MHz : 2790.703 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe cid xtpr bogomips : 5570.56 # As root@{vn66,vn67} cat</etc/motd ############################################################################# # PLEASE DO NOT USE THIS NODE UNTIL FURTHER NOTICE!!! # ############################################################################# END # TODO: Use urpmi-sync to synchronize with vn65 # As root@vn66 mkdir -p /root/install cd /root/install scp -q root@vn65.physics.ubc.ca:/d/vnfe1/home/matt/scripts/urpmi-sync . ./urpmi-sync -vx 142.103.237.65 142.103.237.66 # OK ... and verify ... ./urpmi-sync -v 142.103.237.65 142.103.237.66 | tee /tmp/vn66-vn65 urpmi-sync: 1662 1662 20700 rpma-142.103.237.65 1672 1672 20847 rpma-142.103.237.66 2 2 26 142.103.237.65-not-142.103.237.66 11 11 162 142.103.237.66-not-142.103.237.65 3347 3347 41735 total urpmi-sync: ++++++++++++++++++++++++++++++++++++++++++++++++++++ urpmi-sync: Script execution paused. Enter anything to continue urpmi-sync: ++++++++++++++++++++++++++++++++++++++++++++++++++++ # Run secondary script mkdir -p /root/install cd /root/install scp matt@vnfe1:~/scripts/secondary-vn-2006 s ./s -x vn66 # As root@vn67 mkdir -p /root/install cd /root/install scp -q root@vn65.physics.ubc.ca:/d/vnfe1/home/matt/scripts/urpmi-sync . ./urpmi-sync -vx 142.103.237.65 142.103.237.67 # OK ... and verify ... ./urpmi-sync -v 142.103.237.65 142.103.237.67 | tee /tmp/vn67-vn65 urpmi-sync: 1662 1662 20700 rpma-142.103.237.65 1683 1683 20955 rpma-142.103.237.67 2 2 26 142.103.237.65-not-142.103.237.67 22 22 270 142.103.237.67-not-142.103.237.65 3369 3369 41951 total urpmi-sync: ++++++++++++++++++++++++++++++++++++++++++++++++++++ urpmi-sync: Script execution paused. Enter anything to continue urpmi-sync: ++++++++++++++++++++++++++++++++++++++++++++++++++++ # Run secondary script mkdir -p /root/install cd /root/install scp matt@vnfe1:~/scripts/secondary-vn-2006 s ./s -x vn67 # Need to uninstall newer kernel source rpm -e kernel-source-2.6-2.6.12-17mdk # ... add to secondary-vn-2006 # TODO: Finish mounts (vn66:/eminem vn67:/xtina), inform Steve # ... per elvis # As root@vn65 cat /etc/exports ls -lt /elvis # As root@vn66 mkdir -p /eminem # Nothing to install # As root@vn67 mkdir -p /xtina cd /etc scp root@vn65:/etc/exports . vi exports /xtina vn*.physics.ubc.ca(rw,async,no_root_squash) bh*.physics.ubc.ca(rw,async,no_root_squash) cd /d/vnfe4/home/xtina-2005 cp -a * /xtina # As root@head cd /home/xtina-2005 du -hs * 4.0k Usage 25G backup 4.7M bin 38M etc 5.1M home 4.0k misc 4.0k opt 96M var 28G xtina # As root@vn67 cd /xtina foreach i (`iota 1000`) date du -hs * echo sleep 60 end foreach p (/xtina) cd $p find . -uid 506 -exec chown -R alena.plotkin {} \; find . -uid 514 -exec chown -R matthias.plotkin {} \; find . -uid 500 -exec chown -R plotkin.plotkin {} \; find . -uid 502 -exec chown -R ejtehadi.plotkin {} \; find . -uid 510 -exec chown -R gusev.plotkin {} \; end # Modify scripts # vnN # vnDistEtc viw vnN viw vnDistEtc vnDistEtc csh.cshrc # OK vnDistEtc passwd shadow group hosts.allow hosts.deny hosts etc # vi fstab.node vn63:/mariah /mariah nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vn64:/whitney /whitney nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vn65:/elvis /elvis nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vn66:/eminem /eminem nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vn67:/xtina /xtina nfs rw,bg,rsize=8192,wsize=8192,hard,intr 0 0 vi fstab.vnfe[13] vnallbgCommand 'mkdir -p /eminem /xtina' etc make fstab # As root@vn66 vi /etc/exports exportfs -av # As root@vn67 vi /etc/exports exportfs -av # eminem (vn66) not coming back after reboot Wed Feb 22 15:55:39 PST 2006 # because of fstab.node GOTCHA! (again, no less) Disable!! # As matt@vnfe1 etc viM # ... touch fstab.node make fstab echo "This target no longer exists, due to issues with clobbering the sys part of /etc/fstab" This target no longer exists, due to issues with clobbering the sys part of /etc/fstab vn65 /dev/hda1 / ext3 defaults 1 1 /dev/hda5 swap swap defaults 0 0 /dev/hda6 /scratch ext3 defaults 1 2 # Fixed /etc/fstab's via Rescue option off installation disk # As root@vn66 umount -a -t nfs -l; df; echo; mount -a; df Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 191264348 6460368 175088272 4% / mount: none already mounted or /dev/pts busy mount: backgrounding "vn63:/mariah" mount: backgrounding "vn64:/whitney" mount: backgrounding "vn65:/elvis" mount: vnfe1:/usr/local already mounted or /d/vnfe1/usr/local busy mount: vnfe1:/opt already mounted or /d/vnfe1/opt busy mount: vnfe1:/home already mounted or /d/vnfe1/home busy mount: vnfe1:/home2 already mounted or /d/vnfe1/home2 busy mount: vnfe1:/home3 already mounted or /d/vnfe2/home busy df Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 74574616 33966548 69% / vnfe1:/usr/local 6040288 3723616 2009832 65% /d/vnfe1/usr/local vnfe1:/opt 6040288 3723616 2009832 65% /d/vnfe1/opt vnfe1:/home 10958176 10342320 615856 95% /d/vnfe1/home vnfe1:/home2 17496688 16585776 910912 95% /d/vnfe1/home2 vnfe1:/home3 17496688 9310128 8186560 54% /d/vnfe2/home vnfe3:/home 10958176 8672792 2285384 80% /d/vnfe3/home vnfe3:/home2 17066304 14226424 1951088 88% /d/vnfe3/home2 vnfe4:/home 1014438800 832488632 130419672 87% /d/vnfe4/home bh0:/home 149336320 125115536 16634896 89% /d/bh0/home # AS root@vn67 umount -a -t nfs -l; df; echo; mount -a; df Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 74574616 33966548 69% / mount: none already mounted or /dev/pts busy mount: backgrounding "vn63:/mariah" mount: backgrounding "vn64:/whitney" mount: backgrounding "vn65:/elvis" mount: vn66:/eminem failed, reason given by server: Permission denied Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 74574616 33966548 69% / vnfe1:/usr/local 6040288 3723632 2009824 65% /d/vnfe1/usr/local vnfe1:/opt 6040288 3723632 2009824 65% /d/vnfe1/opt vnfe1:/home 10958176 10342320 615856 95% /d/vnfe1/home vnfe1:/home2 17496688 16585816 910872 95% /d/vnfe1/home2 vnfe1:/home3 17496688 9310128 8186560 54% /d/vnfe2/home vnfe3:/home 10958176 8672800 2285376 80% /d/vnfe3/home vnfe3:/home2 17066304 14226424 1951088 88% /d/vnfe3/home2 vnfe4:/home 1014438800 832488672 130419632 87% /d/vnfe4/home bh0:/home 149336320 125115536 16634896 89% /d/bh0/home mount: vnfe3:/home already mounted or /d/vnfe3/home busy mount: vnfe3:/home2 already mounted or /d/vnfe3/home2 busy mount: vnfe4:/home already mounted or /d/vnfe4/home busy Wed Feb 22 16:17:37 PST 2006 # OK, with luck, this time (4th?) the copy-back from vnfe4 # should go through ... will leave running on console@vn # As root@vn67 cd /xtina RM -r * cd /d/vnfe4/home/xtina-2005 cp -a * /xtina & # TARGET cd /home/xtina-2005 du -hs * 4.0k Usage 25G backup 4.7M bin 38M etc 5.1M home 4.0k misc 4.0k opt 96M var 28G xtina # TODO: Service ntpd start # Starting ntpd: ntpd: unknown user ntp [FAILED] # TODO: Problem with mounting eminem's stuff, DNS issue?? /xtina/xtina copy stalled out --- retry # Need to move backup out of /d/vnfe4/xtina-home- # As root@vnfe4 mkdir xtina-2005-backup cd xtina-2005 mv backup ../xtina-2005-backup du -hs /home/xtina-2005 # As root@vn67 cd /xtina RM -r * df -h /xtina Filesystem Size Used Avail Use% Mounted on /dev/hda5 110G 59G 45G 57% / # AS root@vnfe4 du -hs /home/xtina-2005 28G /home/xtina-2005 # Good to go # As root@vn67 cd /xtina cp -a /d/vnfe4/home/xtina-2005/* . # As root@vnfe4 cd /home/xtina-2005 du -hs * 4.0k Usage 4.7M bin 38M etc 5.1M home 4.0k misc 4.0k opt 96M var 28G xtina # As root@vn67 cd /xtina foreach i (`iota 1000`) date du -hs * echo sleep 60 end ############################################################ Wed Feb 22 12:23:00 PST 2006 ############################################################ # New account for Peter Kostka, student (??) in Bushe's # group From peterlkostka@yahoo.com Wed Feb 22 12:17:34 2006 Date: Wed, 22 Feb 2006 11:58:41 -0800 (PST) From: Peter Kostka To: choptuik@physics.ubc.ca Subject: Account Application For VNP4 1) Full Name: Peter L Kostka 2) Preferred Login Name: pkostka 3) Alternate Login Name (if preferred unavailable/not allowed): plkostka 4) Group (see notes below): Bushe 5) Preferred e-mail:pkostka@interchange.ubc.ca 6) Contact Phone Number:822 2279 or 778-895-0948 7) Preferred Shell (see notes below):bash Thanks Peter vi README.USERS 1256 pkostka # Peter L Kostka nu cat<pkostka pkostka:x:1622:1600:Peter L Kostka:/d/vnfe3/home/pkostka:/bin/bash END vnNewUsers pkostka ssh pkostka@vnfe1 date # OK ssh pkostka@vn35 date # OK etc sola; vs # $1$rlOgQuft$jWYUbXgvmey71tOxIDwmj1 vnDistEtc shadow ssh root@vnfe1 'cd ~pkostka; echo pkostka@interchange.ubc.ca > .forward; chown pkostka.bushe .forward; ls -al; cat .forward' # DUPLICATE ACCOUNT ON vnfe4 # As matt@vnfe1 nu cd Blurbs cp chemming pkostka vi pkostka Rcp pkostka # Send message, with CC to Kendal ############################################################ Fri Feb 24 12:06:16 PST 2006 ############################################################ # Post-de-Plotkinizing of vn66 (eminem) and vn67 (xtina) # As matt@vnfe1 vnDistEtc motd vnallbgCommand 'mkdir -p /scratch && chmod a+rxw /scratch' ############################################################ Sun Feb 26 11:51:20 PST 2006 ############################################################ # New account for Christopher Yearwood, PHAS/UG/Plotkin # As choptuik@physics finger yearwood Login name: cyearwoo In real life: Christopher Yearwood Directory: /home2/cyearwoo Shell: /bin/tcsh sudo pwentry cyearwoo:x:13738:400:Christopher Yearwood:/home2/cyearwoo:/bin/tcsh cyearwoo:kN53qEIB3WTJw:12691::::: vi README.USERS 13738 cyearwoo # Christopher Yearwood nu cat<cyearwoo cyearwoo:x:13738:9200:Christopher Yearwood:/d/vnfe1/home/cyearwoo:/bin/tcsh END vnNewUsers cyearwoo ssh cyearwoo@vnfe1 date # OK ssh cyearwoo@vn35 date # OK # As matt@vnfe1 etc sola; vs # kN53qEIB3WTJw vnDistEtc shadow ssh root@vnfe1 'cd ~cyearwoo; echo cyearwoo@physics.ubc.ca > .forward; chown cyearwoo.plotkin .forward; ls -al; cat .forward' finger cyearwoo # DUPLICATED ACCOUNT ON vnfe4 # As matt@vnfe1 nu cd Blurbs cp chemming cyearwoo vi cyearwoo Rcp cyearwoo # TODO: Send message, with CC to Steve ############################################################ Fri Mar 17 12:26:13 PST 2006 ############################################################ # (From the Seoul ofc) # New account for Tom Depew, PHAS/GS/Michal PHAS 555 # As choptuik@physics finger tomdepew sudo pwentry tomdepew:x:825:307:Tom Depew:/home2/tomdepew:/bin/tcsh tomdepew:S.YxGXwJkZhxk:13019:::::: vi README.USERS 825 tomdepew # Tom Depew (UBC PHAS GS Michal) nu cat<tomdepew tomdepew:x:825:9000:Tom Depew:/d/vnfe1/home/tomdepew:/bin/tcsh END vnNewUsers tomdepew ssh tomdepew@vnfe1 date # OK ssh tomdepew@vn35 date # OK # As matt@vnfe1 etc sola; vs # S.YxGXwJkZhxk vnDistEtc shadow ssh root@vnfe1 'cd ~tomdepew; echo tomdepew@physics.ubc.ca > .forward; chown tomdepew.other .forward; ls -al; cat .forward' finger tomdepew # DUPLICATED ACCOUNT ON vnfe4 # As matt@vnfe1 nu cd Blurbs cp chemming tomdepew vi tomdepew Rcp tomdepew # TODO: Send message, with CC to Martin S and Carl ############################################################ Thu May 11 14:48:29 PDT 2006 ############################################################ # Update resolv.conf / hosts on all nodes to reflect the # change in the UBC backup DNS server # As matt@vnfe1 etc sola get resolv.conf get hosts vnDistEtc hosts resolv.conf vnallCommand 'grep 137 /etc/resolv.conf' | tee /tmp/resolv-conf-137 vnallCommand 'grep 137.82.28 /etc/resolv.conf' | tee /tmp/resolv-conf-137-82-28 ############################################################ Thu May 18 15:15:34 PDT 2006 ############################################################ # Fire alarm over in PHAS, taking opportunity to reboot/diagnose # vn24 (Choptuik/Bushe) # vn62 (Plotkin) # vn66 (Plotkin) # First, define and distribute a unfs 'umount -a -t nfs -l; echo "Sleeping for an extra 2 seconds for good measure"; sleep 2; echo "Executing .df. ... Pray to your favorite deity here, please ..."; df' # As matt@vnfe1 etc make import vi .aliases vnallCommand 'cd; Arc .aliases; /bin/cp /d/vnfe1/home/matt/system/vn/image/master/etc/.aliases .; source .aliases; a unfs' | tee /tmp/aliases # vn24 ping vn24 # DEAD # TODO: Diagnose: P/S, get on KVM etc # Looks like PS. TODO: Advise Jason, and ask him to investigate # vn62 ping vn62 # TODO: Doesn't have 'rwhod' and god knows what else, but then again, # idiot, vn62 is aka sting!! # vn66 ping vn66 # DEAD # Rebooting, hard reboot since stalls on NFS unmount # And really AM getting senile, since vn66's cable is unplugged due # to the fact that the switch is full # TODO: Extract line card from HP Switch in Henn 403, and install over # here # ############################################################ Sat May 20 11:06:52 PDT 2006 ############################################################ # vn24 and vn66 both back on line, vn62 (sting) still # awaiting secondary etc. ############################################################ Tue May 23 14:59:54 PDT 2006 ############################################################ # vnfe1:/home2 was completely filled this afternoon # Have moved # /d/vnfe1/home2/alistair -> /d/vnfe4/home/alistair/alistair ############################################################ Thu May 25 07:30:38 PDT 2006 ############################################################ # Maggie reported (weeks ago!) problem with xv on vnfe3, # apparently on all machines, >>> Executing as root@142.103.237.1 linux-gate.so.1 => (0xffffe000) libX11.so.6 => /usr/X11R6/lib/libX11.so.6 (0x40029000) libjpeg.so.62 => /usr/lib/libjpeg.so.62 (0x400f4000) libpng.so.2 => not found libz.so.1 => /lib/libz.so.1 (0x40113000) libm.so.6 => /lib/tls/libm.so.6 (0x40125000) libc.so.6 => /lib/tls/libc.so.6 (0x40148000) libdl.so.2 => /lib/libdl.so.2 (0x40267000) /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000) # As matt@head ldd `which xv` libX11.so.6 => /usr/X11R6/lib/libX11.so.6 (0x4002a000) libjpeg.so.62 => /usr/lib/libjpeg.so.62 (0x40100000) libpng.so.2 => /usr/lib/libpng.so.2 (0x4011e000) libz.so.1 => /usr/lib/libz.so.1 (0x40140000) libm.so.6 => /lib/i686/libm.so.6 (0x4014e000) libc.so.6 => /lib/i686/libc.so.6 (0x42000000) libdl.so.2 => /lib/libdl.so.2 (0x40170000) /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000) rpm -qf /usr/lib/libpng.so libpng-devel-1.0.14-0.7x.3 # As root@vnfe3 umount /mandrake mount /mandrake # TODO: Resolve /mandrake mounting problem cat /etc/issue # --> cluster running 10.1 # As root@bh0 cd /mandrake/Mandrake101/i586 f png | grep devel ./media/main/libpng3-static-devel-1.2.6-2mdk.i586.rpm ./media/main/libpng3-devel-1.2.6-2mdk.i586.rpm Rcp ./media/main/libpng3-devel-1.2.6-2mdk.i586.rpm scp root@bh0.physics.ubc.ca:/mandrake/Mandrake101/i586/media/main/libpng3-devel-1.2.6-2mdk.i586.rpm . # As matt@vnfe1 cd RPMS scp root@bh0.physics.ubc.ca:/mandrake/Mandrake101/i586/media/main/libpng3-devel-1.2.6-2mdk.i586.rpm . vnallCommand 'rpm -ivh /d/vnfe1/home/matt/RPMS/libpng3-devel-1.2.6-2mdk.i586.rpm' # Need libpng cd RPMS scp matt@bh0.physics.ubc.ca:/mandrake/Mandrake101/i586/media/main/libpng3-1.2.6-2mdk.i586.rpm . vnallbgCommand 'rpm -ivh /d/vnfe1/home/matt/RPMS/*png*rpm' ############################################################ Fri May 26 13:28:13 PDT 2006 ############################################################ # Steve reports various and sundry problems ... Fri May 26 13:28:00 PDT 2006 From steve@phas.ubc.ca Fri May 26 13:27:38 2006 Date: Thu, 25 May 2006 19:32:17 -0700 From: Steven Samuel Plotkin To: Matthew Choptuik Subject: problems w vn64 - many questions, sorry Hi Matt, I am having problems getting files over to vn64, and using editors, so I can run some jobs. It does not seem to be mounted to vnfe1? I have to copy them directly over from my desktop rather than through vnfe1 (presumably these problems hold for my other computers on the cluster as well?): [plotkin@vnfe1 1Doscillator]$ ssh vn64 ... Could not chdir to home directory /d/vnfe1/home/plotkin: No such file or directory /usr/X11R6/bin/xauth: error in locking authority file /d/vnfe1/home/plotkin/.Xauthority from vnfe1: [plotkin@vnfe1 1Doscillator]$ pwd /d/vnfe1/home/plotkin/DistSpaceCurves/NumericalAnalysis/1Doscillator [plotkin@vnfe1 1Doscillator]$ scp -rp * plotkin@vn64.physics.ubc.ca:/whitney/home/steve/DistSpaceCurves/NumericalAnalysis/1Doscillator/ plotkin@vn64.physics.ubc.ca's password: Could not chdir to home directory /d/vnfe1/home/plotkin: No such file or directory [plotkin@vnfe1 1Doscillator]$ scp amebsa-1Dosc.c vn64: plotkin@vn64's password: Could not chdir to home directory /d/vnfe1/home/plotkin: No such file or directory scp: ./amebsa-1Dosc.c: Permission denied [plotkin@vnfe1 1Doscillator]$ I am also noticing that on vn64 I cannot use elementary commands like: -bash-3.00$ cd -bash: cd: /d/vnfe1/home/plotkin: No such file or directory and -bash-3.00$ emacs -bash: emacs: command not found -bash-3.00$ whereis emacs emacs: /etc/emacs /usr/local/emacs /usr/share/emacs (*these are directories not executables*) -bash-3.00$ locate emacs warning: locate: could not open database: /var/lib/slocate/slocate.db: Permission denied warning: You need to run the 'updatedb' command (as root) to create the database. If I cant use emacs on whitney and I cant directly scp files over from vnfe1 it makes it very difficult to work. I am not sure what directory I am supposed to be working in on vn64- is it /whitney/home/steve/? If I cd over to mariah and eminem (on vn64) am I then supposed to be working on those machines? (i.e. were they supposed to be mounted) Those directories seem to be empty: -bash-3.00$ cd mariah/ -bash-3.00$ ls -bash-3.00$ mkdir steve mkdir: cannot create directory `steve': Permission denied -bash-3.00$ cd .. -bash-3.00$ cd eminem/ -bash-3.00$ ls -bash-3.00$ mkdir steve mkdir: cannot create directory `steve': Permission denied Are the gnu scientific libraries are not installed on the cluster? In any event, I am not sure what the easiest way to start working on my machines is. Thanks and sorry for the abundance of questions, Steve # As matt@vnfe1 viw vnNsteve #142.103.237.62 via a vns 'vnsteveCommand \!* | tee /tmp/steve-\!*.txt; more /tmp/steve-\!*.txt; Rcat /tmp/steve-\!*.txt' vns date Warning: Permanently added 'vnfe1.physics.ubc.ca' (RSA) to the list of known hosts. >>> Executing as root@142.103.237.63 Fri May 26 13:33:16 PDT 2006 >>> Executing as root@142.103.237.64 Fri May 26 13:33:33 PDT 2006 >>> Executing as root@142.103.237.65 Fri May 26 13:32:44 PDT 2006 >>> Executing as root@142.103.237.66 Fri May 26 13:33:00 PDT 2006 >>> Executing as root@142.103.237.67 Fri May 26 13:32:57 PDT 2006 vns umount -a -t nfs -l # As matt@vnfe1 etc Arc passwd shadow scp root@vnfe1:/etc/passwd . scp root@vnfe1:/etc/shadow . vnsteveallbgCommand 'mount -a -t nfs' vnsteveCommand 'cd ~steve; pwd; ls' >>> Executing as root@142.103.237.63 /d/vnfe1/home/steve bin/ gr_master.tar pix/ PRL31101.pdf tmp/ gr_master/ p6235_1 PRD44007.pdf scripts/ >>> Executing as root@142.103.237.64 /d/vnfe1/home/steve bin/ gr_master.tar pix/ PRL31101.pdf tmp/ gr_master/ p6235_1 PRD44007.pdf scripts/ >>> Executing as root@142.103.237.65 /d/vnfe1/home/steve bin/ gr_master.tar pix/ PRL31101.pdf tmp/ gr_master/ p6235_1 PRD44007.pdf scripts/ >>> Executing as root@142.103.237.66 /d/vnfe1/home/steve bin/ gr_master.tar pix/ PRL31101.pdf tmp/ gr_master/ p6235_1 PRD44007.pdf scripts/ >>> Executing as root@142.103.237.67 /d/vnfe1/home/steve bin/ gr_master.tar pix/ PRL31101.pdf tmp/ gr_master/ p6235_1 PRD44007.pdf scripts/ # But of course, Steve P is 'plotkin'. NOT steve vnsteveCommand 'cd ~plotkin; pwd; ls' # OK # As root@vnfe1 # Set up so matt@vnfe1 can ssh steve@vnfe1 -bash: TMOUT: readonly variable # ... being generated from /etc/profile.d/msec.sh # As matt@vnfe1 etc mkdir -p profile.d cd !$ scp root@vnfe1:/etc/profile.d/msec.sh . vi msec.sh vnallbgCommand 'cd /etc/profile.d; CP /d/vnfe1/home/matt/system/vn/image/master/etc/profile.d/msec.sh .' # As root@vnfe1 cd /etc/profile.d scp msec.sh root@vn62:`pwd` # As matt@vnfe1 a d 'ssh plotkin@vn\!* date' d63 d64 d65 d66 d67 # OK # emacs NOT installed on all of Steve's machines # As root@vn1 rpm -qf `which emacs` vnsteveCommand 'cd /mandrake; ls' # TODO: Get /mandrake mounting on these machines, get # rpm via bh0 # As matt@bh0 mkdir -p /d/vnfe1/home/matt/RPMS/10.1/EMACS cd /mandrake/Mandrake101/i586/media/main cp *emacs*rpm /d/vnfe1/home/matt/RPMS/10.1/EMACS # As matt@vnfe1 vnsteveCommand 'cd /d/vnfe1/home/matt/RPMS/10.1/EMACS; rpm --nodeps -ivh *' # Nope, too heavy fisted vnsteveCommand 'cd /d/vnfe1/home/matt/RPMS/10.1/EMACS; rpm --nodeps -ivh emacs-21.3-15mdk.i586.rpm xemacs-21.4.15-5mdk.i586.rpm' # TODO: FInish this up. ############################################################ Sat May 27 08:20:42 PDT 2006 ############################################################ # Update /etc/motd # As matt@vnfe1 etc cp motd.2006.05.23 motd.2006.05.27 vi !$ CP !$ motd vnDistEtc motd ############################################################ Thu Jun 1 08:49:38 PDT 2006 ############################################################ # Name/account swaperoo # steve -> liebling # plotkin -> steve # As matt@vnfe1 etc make import grep steve /etc/{passwd,shadow} /etc/passwd:steve:!:611:600:Steve Liebling:/d/vnfe1/home/steve:/bin/tcsh /etc/shadow:steve:yEmUKu1Xevoos:10910:0:99999:7::: grep steve {passwd,shadow} passwd:steve:!:611:600:Steve Liebling:/d/vnfe1/home/steve:/bin/tcsh shadow:steve:yEmUKu1Xevoos:10910:0:99999:7::: grep plotkin /etc/{passwd,shadow} /etc/passwd:plotkin:x:412:9200:Steven Plotkin:/d/vnfe1/home/plotkin:/bin/bash /etc/shadow:plotkin:$1$jQYiWpd3$4QopjBKVLmzlfeByuxfBK.:11585:0:99999:7:::134542192 grep plotkin {passwd,shadow} passwd:plotkin:x:412:9200:Steven Plotkin:/d/vnfe1/home/plotkin: shadow:plotkin:$1$jQYiWpd3$4QopjBKVLmzlfeByuxfBK.:11585:0:99999 vi {passwd,shadow} # As root@vnfe1 mv /d/vnfe1/home/steve /d/vnfe1/home/liebling mv /d/vnfe1/home/plotkin /d/vnfe1/home/steve # Send Steve L a message # As matt@bh0 etcc2 make import vi {passwd,shadow} # steve -> liebling # plotkin -> steve c2DistEtc passwd shadow # As root@vnfe4 mv /d/vnfe4/home/steve /d/vnfe4/home/liebling mv /d/vnfe4/home/plotkin /d/vnfe4/home/steve # OK ############################################################ Sun Jun 4 15:01:09 PDT 2006 ############################################################ # Back in machine room with power on and, frustratingly, no # experience in reconnecting the NEMA-?? connectors for the # UPSes in the new cluster. # So will tend to old cluster first # vnfe1, vnfe3 up # Others powered up vnCommand date vnCommand date | tee /tmp/vn-date # Had to hack on node /etc/fstab file (comment out vnfe1:/usr/local etc.) vnNbgCommand 'cd /etc; Arc fstab; scp -q matt@vnfe1.physics.ubc.ca:/d/vnfe1/home/matt/system/vn/image/master/etc/fstab.node fstab; mount -a; df' vnCommand 'vnSetdate; ntptimeset' | tee /tmp/vn-time # Back to figuring out NEMA connector issue # UPSes are Tripp.Lite SMART3000RM2U with # NEMA L5-30P # connectors # Still hacing problems with the connectors! Embarrassing! # Actually, of course, it's the OUTPUTs to the PDU's which are disconnected, # but the output are L5-30 as well, plugs to the PDUs are L5-20 # Give up!! Sun Jun 4 17:17:35 PDT 2006 # Trip to Home Depot very profitable ... became clear that I had been trying # to mate two incompatible things (of course), the L5-20 socket is at the # top with the "regular" AC outlets # ALl machines up, but video via KVM screwed up again # TODO: Fix properly myself TODO TODO # As root@vnfe4 fornodes uptime | tee /tmp/vnp4-uptime ==================== node001 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node002 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.03, 0.00 ==================== node003 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node004 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node005 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node006 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node007 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node008 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node009 ==================== 5:19pm up 5 min, 0 users, load average: 0.01, 0.03, 0.00 ==================== node010 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node011 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node012 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node013 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node014 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.04, 0.01 ==================== node015 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.03, 0.00 ==================== node016 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node017 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node018 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node019 ==================== 5:19pm up 5 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node020 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node021 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node022 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node023 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node024 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node025 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node026 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node027 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node028 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node029 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node030 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node031 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node032 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node033 ==================== 5:19pm up 6 min, 0 users, load average: 0.01, 0.04, 0.01 ==================== node034 ==================== 5:19pm up 6 min, 0 users, load average: 0.09, 0.04, 0.01 ==================== node035 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node036 ==================== 5:19pm up 6 min, 0 users, load average: 0.02, 0.04, 0.01 ==================== node037 ==================== 5:19pm up 6 min, 0 users, load average: 0.01, 0.03, 0.00 ==================== node038 ==================== ==================== node039 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node040 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node041 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node042 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node043 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node044 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.03, 0.01 ==================== node045 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.00, 0.00 ==================== node046 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.00, 0.00 ==================== node047 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.00, 0.00 ==================== node048 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.03, 0.00 ==================== node049 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node050 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node051 ==================== 5:19pm up 6 min, 0 users, load average: 0.08, 0.05, 0.01 ==================== node052 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node053 ==================== 5:19pm up 6 min, 0 users, load average: 0.08, 0.04, 0.01 ==================== node054 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.02, 0.00 ==================== node055 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== node056 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.03, 0.01 ==================== node057 ==================== 5:19pm up 6 min, 0 users, load average: 0.00, 0.01, 0.00 ==================== store ==================== 5:19pm up 7 min, 0 users, load average: 0.00, 0.00, 0.00 # Looks good to go ... # As matt@vnfe4 cdex make clean make Scan node001 # reveals that there is some NFS mounting to be done fornodes 'umount -a -t nfs -l; mount -a; df' | tee /tmp/vnp4-nfs ==================== node001 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 931764 4944432 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 161028 1797684 9% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 931764 4944432 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node002 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911932 4964264 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 461252 1497460 24% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911932 4964264 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node003 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 147912 1810800 8% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node004 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 912116 4964080 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 120424 1838288 7% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 912116 4964080 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node005 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 81312 1877400 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node006 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 774468 1184244 40% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node007 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 815700 5060496 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 394304 1564408 21% /var /dev/hda4 66602516 32876 63186352 1% /var/scratch store:/home2 6190664 815700 5060496 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node008 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 159664 1799048 9% /var /dev/hda4 66602516 32876 63186352 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node009 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 95192 1863520 5% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node010 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 815708 5060488 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 84340 1874372 5% /var /dev/hda4 66602516 32876 63186352 1% /var/scratch store:/home2 6190664 815708 5060488 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node011 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 90708 1868004 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node012 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 77424 1881288 4% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node013 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 103560 1855152 6% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node014 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 819808 5056388 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 107496 1851216 6% /var /dev/hda4 66602516 32832 63186396 1% /var/scratch store:/home2 6190664 819808 5056388 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node015 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 90084 1868628 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node016 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 112032 1846680 6% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node017 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 79304 1879408 5% /var /dev/hda4 66602516 32876 63186352 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node018 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 81280 1877432 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node019 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 105992 1852720 6% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node020 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 912044 4964152 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 94392 1864320 5% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 912044 4964152 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node021 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 913324 4962872 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 109472 1849240 6% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 913324 4962872 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node022 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 823780 5052416 15% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 1983876 0 100% /var /dev/hda4 66602516 32876 63186352 1% /var/scratch store:/home2 6190664 823780 5052416 15% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node023 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911940 4964256 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 1985116 0 100% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911940 4964256 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node024 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 86756 1871956 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node025 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 931724 4944472 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 66308 1892404 4% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 931724 4944472 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node026 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 815712 5060484 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 125112 1833600 7% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 815712 5060484 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node027 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 90568 1868144 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node028 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911924 4964272 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 92328 1866384 5% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911924 4964272 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node029 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 219696 1739016 12% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node030 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 832292 5043904 15% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 123356 1835356 7% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 832292 5043904 15% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node031 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911996 4964200 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 427872 1530840 22% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911996 4964200 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node032 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 85380 1873332 5% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node033 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 815704 5060492 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 198844 1759868 11% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 815704 5060492 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node034 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 110612 1848100 6% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node035 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911928 4964268 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 1989896 0 100% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911928 4964268 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node036 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 100644 1858068 6% /var /dev/hda4 66602516 32884 63186344 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node037 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 912564 4963632 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 90776 1867936 5% /var /dev/hda4 66602516 32880 63186348 1% /var/scratch store:/home2 6190664 912564 4963632 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node038 ==================== ==================== node039 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911896 4964300 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 100412 1858300 6% /var /dev/hda4 66602516 32896 63186332 1% /var/scratch store:/home2 6190664 911896 4964300 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node040 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911936 4964260 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 132064 1826648 7% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911936 4964260 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node041 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911888 4964308 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 116372 1842340 6% /var /dev/hda4 66602516 32896 63186332 1% /var/scratch store:/home2 6190664 911888 4964308 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node042 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911892 4964304 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 127140 1831572 7% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911892 4964304 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node043 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911892 4964304 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 162820 1795892 9% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911892 4964304 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node044 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911892 4964304 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 577244 1381468 30% /var /dev/hda4 66602516 32896 63186332 1% /var/scratch store:/home2 6190664 911892 4964304 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node045 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911888 4964308 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 109716 1848996 6% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 911888 4964308 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node046 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911892 4964304 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 145972 1812740 8% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 911892 4964304 16% /home2 head:/home 1014438800 789258568 173649744 82% /home head:/opt 6190664 4218040 1658152 72% /opt ==================== node047 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911892 4964304 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 135284 1823428 7% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 911892 4964304 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node048 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911888 4964308 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 134376 1824336 7% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 911888 4964308 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node049 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911884 4964312 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 121852 1836860 7% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911884 4964312 16% /home2 head:/home 1014438800 789258568 173649744 82% /home head:/opt 6190664 4218040 1658152 72% /opt ==================== node050 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911920 4964276 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 418792 1539920 22% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 911920 4964276 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node051 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 907872 4968324 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 136660 1822052 7% /var /dev/hda4 66602516 32888 63186340 1% /var/scratch store:/home2 6190664 907872 4968324 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node052 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911876 4964320 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 157612 1801100 9% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 911876 4964320 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node053 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911888 4964308 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 174824 1783888 9% /var /dev/hda4 66602516 32896 63186332 1% /var/scratch store:/home2 6190664 911888 4964308 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node054 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 911884 4964312 16% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 322260 1636452 17% /var /dev/hda4 66602516 32896 63186332 1% /var/scratch store:/home2 6190664 911884 4964312 16% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node055 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 807624 5068572 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 59808 1898904 4% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 807624 5068572 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node056 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 807628 5068568 14% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 58952 1899760 4% /var /dev/hda4 66602516 32892 63186336 1% /var/scratch store:/home2 6190664 807628 5068568 14% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== node057 ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/hda1 6190664 715640 5160556 13% / none 1032416 0 1032416 0% /dev/shm /dev/hda2 2063536 58060 1900652 3% /var /dev/hda4 66602516 32832 63186396 1% /var/scratch store:/home2 6190664 715640 5160556 13% /home2 head:/opt 6190664 4218040 1658152 72% /opt head:/home 1014438800 789258568 173649744 82% /home ==================== store ==================== Filesystem 1k-blocks Used Available Use% Mounted on /dev/sda1 6190664 1472736 4403460 26% / /dev/sda4 462399808 388486188 50424984 89% /home2 none 515340 0 515340 0% /dev/shm /dev/sda2 2063536 74280 1884432 4% /var head:/home 1014438800 789258568 173649744 82% /home head:/opt 6190664 4218040 1658152 72% /opt # OK cdex make clean make cpi Scan node001 | tee /tmp/vnp4-mpi-test # Nope # As root@head fornodes 'service gm restart' service gm-mapper restart pbsfix # OK # Disable node038 from /opt/.nodes # TODO: reenable node038 # As root@head cd /opt ############################################################ Mon Jun 5 06:36:28 PDT 2006 ############################################################ # TODO: New accounts for Kunal and Deepak, Frans's summer # students from India From fransp@phys.ualberta.ca Mon Jun 5 06:35:36 2006 Deepak: dkhurana@cita.utoronto.ca and Deepak.Khurana@iitkgp.ac.in Kunal: krajani@cita.utoronto.ca and kunalrajani@iitb.ac.in I'll have them set-up their ssh stuff tomorrow and send the keys to you. ############################################################ Mon Jun 5 06:43:05 PDT 2006 ############################################################ # 4 of Steve's machines are down vn62 down 240+02:41 vn63 down 3+20:45 vn64 down 1+20:20 vn65 down 3+20:39 ############################################################ Tue Jun 6 01:50:36 PDT 2006 ############################################################ # Create 'pretoriu' group, and create accounts for Frans 2 # summer students from the subcontinent # # TODO: chgrp on frans's stuff # As matt@vnfe1 etc make import vi group # Add 'pretoriu' pretoriu::660: vnDistEtc group # As root@vnfe1 chgrp -R pretoriu ~fransp ls -ltd ~fransp From krajani@cita.utoronto.ca Tue Jun 6 01:56:09 2006 Date: Mon, 5 Jun 2006 16:58:00 -0400 (EDT) From: Kunal Rajani To: matt@bh0.physics.ubc.ca Subject: ssh key Hi Matt, Frans told us that you would be setting up a CVS repository for our projects and you would allot us some webspace. Here is my public rsa id for CVS. Regards, Kunal Rajani ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAnoJVo3mBNKNd2zEsnXpmK3IJDiVNlgPdVYAzcQDB/YH9BQPQGllc6Oz7Dfr2db0g2LIfHzXaN7wJQckDqsFwPKid4JR4Q/Yi0712fN3tXc8tiQrgZ/eD/4c7cRaLrb8nkBZTSEkInFeKGpkPD0Vj/W7ZKSXC3P7qM8tVHbLE+SBPvu6lQdmf3G5oLnoqLK8hQZHVsB4GIsWf590s8K8OV6NiIchRd33xpKGUOg1MpJls4rxWr5BjlNArAplpyQqlvR8yXnWsQ61ulqbN50rYsh2ObWStiNlEpJXxwEi3czLqM6sMZdXvfCJgjsgGGMB/mPLjjiSk9WXeT7FfCVFeEQ== krajani@bat # ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAxcmictZRvIxWe6R8WMTvHr2nxpTkTiqE4RyucZVhjPsC3s6FpCX/DwCE8l+pwEbt2Sep/KekN8YAtATq5BZUUTvaP1XrAUuPzFQTtrRcysyLsuFq35QVTtkZ8DmmMwMFB8icDlBpwfmHWytRdV0Tri5O8fZtyKSPIbBizpLywRaSXZ+hbnO3nACkzoKzpi/AgZ+PFfZbj69A5R5FQYdwOf8/evnl7vzzEX2DkVs148w0okp7jn48s9fOEJ8hsHgX0XnAW61CqrxWT79Y8LpQlkeR1uvT39c0m1zptbnViFaTKZN+f/pKGJAjMvOOFzpKNCk9tgANX8ppzzmd3ObYzw== dkhurana@horse # As krajani:x:10000:100:Kunal Rajani:/d/bh9/home/krajani:/bin/tcsh dkhurana:x:10001:100:Deepak Khurana:/d/bh9/home/dkhurana:/bin/tcsh nu cat<krajani krajani:x:10000:660:Kunal Rajani:/d/vnfe1/home/krajani:/bin/tcsh END vnNewUsers krajani ssh krajani@vnfe1 date # OK ssh krajani@vn35 date # OK # As root@vnfe1 passwd krajani # Set passwd # As matt@vnfe1 etc make import vnDistEtc shadow ssh krajani@vn35 date Deepak: dkhurana@cita.utoronto.ca and Deepak.Khurana@iitkgp.ac.in Kunal: krajani@cita.utoronto.ca and kunalrajani@iitb.ac.in ssh root@vnfe1 'cd ~krajani; echo kunalrajani@iitb.ac.in > .forward; chown krajani.pretoriu .forward; ls -al; cat .forward' finger krajani # Create authorized_keys and install id_rsa.pub # TODO: DUPLICATED ACCOUNT ON vnfe4 nu cat<dkhurana dkhurana:x:10001:660:Deepak.Khurana:/d/vnfe1/home/dkhurana:/bin/tcsh END vnNewUsers dkhurana ssh dkhurana@vnfe1 date # OK ssh dkhurana@vn35 date # OK # As root@vnfe1 passwd dkhurana # Set passwd # As matt@vnfe1 etc sola get shadow vnDistEtc shadow ssh dkhurana@vn35 date Deepak: dkhurana@cita.utoronto.ca and Deepak.Khurana@iitkgp.ac.in Kunal: krajani@cita.utoronto.ca and kunalrajani@iitb.ac.in ssh root@vnfe1 'cd ~dkhurana; echo Deepak.Khurana@iitkgp.ac.in > .forward; chown dkhurana.pretoriu .forward; ls -al; cat .forward' finger dkhurana # Create authorized_keys and install id_rsa.pub # TODO: DUPLICATED ACCOUNT ON vnfe4 ############################################################ Fri Jun 9 12:49:23 PDT 2006 ############################################################ # Getting Steve's machines back up via boot disk, rescue, # and restoration of /etc/fstab vn65 # OK? No had network configured for 1000 rather than 100 # leave at 100 since switch is 100 OK # TODO: GET vn62, vn66, vn67 labelled # TODO: vn66 and vn67 TODO RESCUE NOT WORKING vn62 #TODO: vn63 #TODO: vn64 NOT BOOTING OFF MANDRIVA DISKS, HAVE JASON MAKE # TWO SETS OF BOOT FLOPPIES #TODO: vn66 #TODO: vn67 ############################################################ Sat Jun 10 08:37:22 PDT 2006 ############################################################ # In machine room preparing for video tour for gents in # Kamloops # Saturate vnfe1 and vnfe4 # As root@vnfe1 vnallbgCommand Lock # As root@vnfe4 c2allbgCommand Lock # Leave for 20 minutes or so for equilibration Sat Jun 10 08:39:06 PDT 2006 # ... upon return, lift floor tile # Co-lo floor space 9 x 12 tiles + 3 x 5 approx 125 tiles ############################################################ Tue Jun 13 19:43:03 PDT 2006 ############################################################ # Had forgotten to kill Lock jobs vnCommand 'killall Lock; killall Lock' vnCommand 'killall Lock; killall Lock' # OK ############################################################ Thu Jun 15 14:50:47 PDT 2006 ############################################################ # vnfe1 died during some "routine" cluster work a few days # ago, Tony now here with PS (+ 2 spare) and replacing PS. # Machine hasn't been opened in years, and has been running # for almost 7 years, and, not surprisingly, is extremely clean # ############################################################ Thu Jun 15 15:12:23 PDT 2006 ############################################################ # vnfe1 back on line after PS replacement by Tony @ Varsity # As root@vnfe1 vnCommand 'mount -a; df' ntptimeset vnCommand 'df | grep vnfe1' | tee -a /tmp/df-vnfe1 # Looks OK # Update # # /etc/motd # http://bh0.phas.ubc.ca/VN # ############################################################ Thu Jun 15 15:12:23 PDT 2006 ############################################################ # Tom D reports problems logging in, will reset his vn pass # to physics # As choptuik@physics sudo pwentry tomdepew:x:825:307:Tom Depew:/home2/tomdepew:/bin/tcsh tomdepew:S.YxGXwJkZhxk:13019:::::: # As matt@vnfe1 etc sola # TODO ... Something severely fucked here. From tomdepew@phas.ubc.ca Fri Jun 16 07:11:01 2006 Date: Thu, 15 Jun 2006 22:07:57 -0700 (PDT) From: Tom Depew To: choptuik@phas.ubc.ca Subject: vnfe1 Hi Matt, You may still be in the process of fixing vnfe1 after it decided to take its unscheduled holiday, but I was trying to log on tonight and it now appears to be back online (it doesn't just hang after the ssh command) but it doesn't seem to like my password. Maybe it got reset or something after the crash. Just need to get my 555 assignments off there!! Tom Tom Depew | M.A.Sc. Physics | Henn. 100 | tomdepew@phas.ubc.ca ------------------------------------------------------------------------ Carpe Diem ############################################################ Tue Jun 20 15:31:01 PDT 2006 ############################################################ # New accounts for # # Gungwon Kang # Changheon Oh # As root@bh0 grep gwkang /etc/{shadow,passwd} /etc/shadow:gwkang:RcKuEmJw4pmGQ:13319:-1:99999:-1::: /etc/passwd:gwkang:x:503:100:Gungwon Kang:/d/bh9/home/gwkang:/bin/tcsh grep choh /etc/{shadow,passwd} /etc/shadow:choh:kfFrLnGrWOAiA:13319:-1:99999:-1::: /etc/passwd:choh:x:435:100:Changheon Oh:/d/bh9/home/choh:/bin/tcsh vi README.USERS 503 gwkang # Dr Gungwon Kang (KISTI Visiting Scientist, Choptuik) 435 choh # Changheon Oh (Visiting Scholar, PHYS GS, Hanyang U, Choptuk) cat<gwkang gwkang:x:503:600:Gungwon Kang Depew:/d/vnfe1/home/gwkang:/bin/tcsh END vnNewUsers gwkang ssh gwkang@vnfe1 date # OK ssh gwkang@vn35 date # OK # As matt@vnfe1 etc sola; vs # RcKuEmJw4pmGQ vnDistEtc shadow ssh root@vnfe1 'cd ~gwkang; echo gwkang@kisti.re.kr > .forward; chown gwkang.choptuik .forward; ls -al; cat .forward' finger gwkang # DUPLICATED ACCOUNT ON vnfe4 cat<choh choh:x:435:600:Changheon Oh:/d/vnfe1/home/choh:/bin/tcsh END vnNewUsers choh ssh choh@vnfe1 date # OK ssh choh@vn35 date # OK # As matt@vnfe1 etc sola; vs # kfFrLnGrWOAiA vnDistEtc shadow ssh root@vnfe1 'cd ~choh; echo och0423@ihanyang.ac.kr > .forward; chown choh.choptuik .forward; ls -al; cat .forward' finger choh # DUPLICATED ACCOUNT ON vnfe4 ############################################################ Wed Jun 21 13:25:13 PDT 2006 ############################################################ # Liam McW ... reports problem ssh-ing in despite my # proclamation that I had reset his password. For some # reason entire 'liam' line was missing from /etc/shadow # Restored and reset per e-mail message liam:$1$lXRGddTM$lo5RUCFp2ogQkUtf3.DUt0:13318:0:99999:7::: # and sent Liam message # TODO: Look for other inconsistencies in # # {bh...,vn...,head}/etc/{shadow,passwd,group} ############################################################ Wed Jun 21 16:49:18 PDT 2006 ############################################################ # In machine room with AJP doing disk transferral from all # of Steve P's machines to vn62 (vn17 no go since most if not # all of drives are SATA), in attempt to recover /etc/fstab # and thus boot-ability etc # Machines affected # # vn63 FIXED # vn64 FIXED # vn65 Jason having problems extracting the drive # vn66 " # vn67 " # Thu Jun 22 09:35:39 PDT 2006 # Thu Jun 22 09:35:39 PDT 2006 # Continuing in machine room with AJP and jumpers courtesy # of Varsity --- thx, Tony # vn65, vn66, vn67 are IDE, use vn17 # Put vn17 on KVM:2 # As root@vn17 shutdown -h now ############################################################ Thu Jun 22 08:30:16 PDT 2006 ############################################################ # Restoring Tom Depew's account # As choptuik@physics sudo pwentry Password: Enter username: tomdepew tomdepew:x:825:307:Tom Depew:/home2/tomdepew:/bin/tcsh tomdepew:S.YxGXwJkZhxk:13019:::::: cat<tomdepew tomdepew:x:825:9000:Tom Depew:/d/vnfe1/home/tomdepew:/bin/tcsh END vnNewUsers tomdepew # As root@vnfe1 grep tomdepew /etc/{shadow,passwd} # /etc/passwd:tomdepew:x:825:9000:Tom Depew:/d/vnfe1/home/tomdepew:/bin/tcsh # As matt@vnfe1 etc cd Archive grep tomdepew shadow* shadow.2006-05-18-1518-24-535584:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: shadow.2006-05-26-1341-20-638641:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: shadow.2006-06-01-0850-13-169885:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: shadow.2006-06-06-0150-00-369706:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: shadow.2006-06-06-0204-40-493864:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: # As matt@vnfe1 etc sola get shadow vs vnDistEtc shadow grep tomdepew /etc/{shadow,passwd} /etc/shadow:tomdepew:S.YxGXwJkZhxk:13224:0:99999:7::: /etc/passwd:tomdepew:x:825:9000:Tom Depew:/d/vnfe1/home/tomdepew:/bin/tcsh # As root@vnfe1 cat ~matt/.ssh/id_rsa.pub >> ~tomdepew/.ssh/authorized_keys # As matt@vnfe1 ssh tomdepew@vnfe1 # OK, End of incident Thu Jun 22 08:36:48 PDT 2006 ############################################################ Sun Jun 25 08:58:06 PDT 2006 ############################################################ # Steve P reports problems logging in with "old" passwd # Reset to current one on physics # As choptuik@physics sudo pwentry steve:x:412:307:Steven Plotkin:/home/steve:/bin/bash steve:yEmUKu1Xevoos:11736::::::-1 ############################################################ Tue Jun 27 10:21:12 PDT 2006 ############################################################ # vn36 down, and vnfe1 "accidentally" rebooted several times # need mea cupla on website and motd # As matt@vnfe1 etc ls -lt motd.2006.06* cp motd.2006.06.15 motd.2006.06.26 vi !$ CP motd.2006.06.26 motd vnDistEtc # As matt@bh0 tmotd TUESDAY JUNE 27 1100 vnfe1 WAS ACCIDENTALLY REBOOTED THIS MORNING (SEVERAL TIMES) DUE TO MANAGEMENT ISSUES. In the unlikely event that ANYONE was actually running a task performing I/O to a vnfe1 partition, the output from those jobs should be checked for integrity, with jobs restarted where necessary. Management apologizes for the inconvenience. Also vn36 is off-line, probably due to a power supply issue, and is awaiting attn from AJP (ahem!!) ############################################################ Mon Jul 3 13:11:07 PDT 2006 ############################################################ # Back in machine room ... Another old node has bitten the # dust and vn62 and the other two (?) Plotkin machines need # to be brought on-line # As root@vnfe1 down vn16 down 5+20:14 vn62 down 268+09:09 vn63 down 32+03:13 vn65 down 32+03:07 vn67 down 24+00:19 #----------------------------------------------------------- # vn16 -> SEE README.CRASH () #----------------------------------------------------------- ############################################################ Thu Jul 6 11:11:23 PDT 2006 ############################################################ # Continuing with the resurrection of Steve P's machines # As root@vnfe1 down vn62 down 271+07:09 vn63 down 35+01:13 vn65 down 35+01:07 vn67 down 26+22:19 # vn62 is 64-bit, and is back on line, but rwhod is broken, user/group/passwd # issue # As root@vn1 rpm -qa | grep rwho rwho-0.17-10mdk rpm -ql rwho-0.17-10mdk /etc/rc.d/init.d/rwhod /usr/bin/ruptime /usr/bin/rwho /usr/sbin/rwhod /usr/share/man/man1/ruptime.1.bz2 /usr/share/man/man1/rwho.1.bz2 /usr/share/man/man8/rwhod.8.bz2 /var/spool/rwho ls -ltd `rpm -ql rwho-0.17-10mdk` | wc 8 72 532 ls -ltd `rpm -ql rwho-0.17-10mdk` | tee /tmp/rpm-rwho drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ -rwxr--r-- 1 root root 933 Jan 5 2005 /etc/rc.d/init.d/rwhod* -rwxr-xr-x 1 root root 6328 Jan 5 2005 /usr/bin/ruptime* -rwxr-xr-x 1 root root 5784 Jan 5 2005 /usr/bin/rwho* -rwxr-xr-x 1 root root 10720 Jan 5 2005 /usr/sbin/rwhod* -rw-r--r-- 1 root root 1576 Jan 5 2005 /usr/share/man/man1/ruptime.1.bz2 -rw-r--r-- 1 root root 2532 Jan 5 2005 /usr/share/man/man8/rwhod.8.bz2 -rw-r--r-- 1 root root 1588 Jan 5 2005 /usr/share/man/man1/rwho.1.bz2 grep rwho /etc/{group,passwd,shadow} /etc/passwd:rwhod:x:9999:9999:rwhod user:/:/bin/false # As root@vn62 rpm -qa | grep rwho rwho-0.17-11mdk rpm -ql rwho-0.17-11mdk /etc/rc.d/init.d/rwhod /usr/bin/ruptime /usr/bin/rwho /usr/sbin/rwhod /usr/share/man/man1/ruptime.1.bz2 /usr/share/man/man1/rwho.1.bz2 /usr/share/man/man8/rwhod.8.bz2 /var/spool/rwho ls -ltd `rpm -ql rwho-0.17-11mdk` | wc 8 72 564 ls -ltd `rpm -ql rwho-0.17-11mdk` | tee /tmp/rpm-rwho -rwxr--r-- 1 root root 933 Aug 17 2005 /etc/rc.d/init.d/rwhod* -rwxr-xr-x 1 root root 8768 Aug 17 2005 /usr/bin/ruptime* -rwxr-xr-x 1 root root 7408 Aug 17 2005 /usr/bin/rwho* -rwxr-xr-x 1 root root 16392 Aug 17 2005 /usr/sbin/rwhod* -rw-r--r-- 1 root root 1576 Aug 17 2005 /usr/share/man/man1/ruptime.1.bz2 -rw-r--r-- 1 root root 1588 Aug 17 2005 /usr/share/man/man1/rwho.1.bz2 -rw-r--r-- 1 root root 2532 Aug 17 2005 /usr/share/man/man8/rwhod.8.bz2 drwxr-xr-x 2 daemon daemon 4096 Aug 17 2005 /var/spool/rwho/ grep rwho /etc/{group,passwd,shadow} /etc/passwd:rwhod:x:9999:9999:rwhod user:/:/bin/false # As matt@vnfe1 # TODO: Re-execute this block once the other Plotkin machines are up #--------------------------------------------------------------------- vnallbgCommand 'test -d /var/spool/rwho && chown -R root.root /var/spool/rwho' vnallbgCommand 'service rwhod restart' vnCommand 'ls -ltd /var/spool/rwho' | tee /tmp/vn-var-spool-rwho Rcat /tmp/vn-var-spool-rwho !!ssh matt@vnfe1.physics.ubc.ca cat /tmp/vn-var-spool-rwho #--------------------------------------------------------------------- >>> Executing as root@142.103.237.1 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.2 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.3 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.4 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.5 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.6 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.7 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.8 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.9 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.10 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.11 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.12 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.13 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.14 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.15 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.16 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.17 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.18 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.19 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.20 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.21 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.22 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.23 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.24 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.25 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.26 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.27 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.28 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.29 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.30 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.31 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.32 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.33 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.34 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.35 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.36 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.37 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.38 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.39 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.40 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.41 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.42 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.43 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.44 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.45 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.46 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.47 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.48 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.49 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.50 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.51 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.52 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.53 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.54 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.55 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.56 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.57 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.58 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.59 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.60 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.61 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.62 drwxr-xr-x 2 root root 4096 Aug 17 2005 /var/spool/rwho/ >>> Executing as root@142.103.237.63 >>> Executing as root@142.103.237.64 drwxr-xr-x 2 root root 4096 Feb 22 16:21 /var/spool/rwho/ >>> Executing as root@142.103.237.65 >>> Executing as root@142.103.237.66 drwxr-xr-x 2 root root 4096 Feb 22 16:13 /var/spool/rwho/ >>> Executing as root@142.103.237.67 #--------------------------------------------------------------------- # vnfe1 still things vn62 is down, although other machines, such as # vn1 think it's up. Give vnfe1 some time ... TODO # Ensure that vnfe1 is seeing vn62 up ... OK #--------------------------------------------------------------------- # # Other machines # # vn63 # vn65 # vn67 # # are apparently having problems with the network interfaces. Since # all are connected to a) each other, and b) to the outside world # via 100 Mb, no particular reason to try to get 1000 Mb working, # but need solid documentation re different NICs that are installed # # # vn63 # Top: 10/100 Mb # Bottom: 1000 Mb # # vn65 # Top: 10/100 Mb # Bottom: 1000 Mb # # vn67 # Top: 10/100 Mb # Bottom: 1000 Mb # # Proposed strategy: connect all ether cables to 10/100, insert # Mandriva 2006 boot CD, do a network install[update], and configure # network prior to reboot # With KVM 2 on vn63, insert boot disk, reboot # No dice ... still problem with 10/100 interface post-install, not finding driver. # TODO: A proper job here that doesn't was another oodle of time or so! # Well, part of the problem is that I've been configuring as vn63 is actually # vn66! ############################################################ Mon Jul 10 16:38:51 PDT 2006 ############################################################ # Following up on a suggestion by Bill U to look at # /etc/modprobe.conf, hack on that file on vn6[567], removing # e1000 entry with some limited success. In the end for two # of the machines, added following to # # /etc/rc.d/rc.local # Horrible hack to get network going echo "Sleeping for 5 seconds" >> /var/log/messages 2>&1 sleep 5 modprobe eepro100 >> /var/log/messages 2>&1 service network restart >> /var/log/messages 2>&1 mount -a >> /var/log/messages 2>&1 & # vnfe1:/home is 100% full du -hs /home/shirin df -h /dev/sda6 11G 11G 7.0M 100% /home /dev/sdb1 17G 14G 3.1G 82% /home2 /dev/sdc1 17G 8.9G 7.9G 53% /home3 # As root@vnfe1 mv /home/shirin /home3 ln -s /home3/shirin /home # Updated /etc/motd to reflect filling of vnfe1:/home and the fact that all # nodes are back on-line # Update web page # As matt@vnfe1 foreach f (passwd shadow group hosts.allow hosts.deny hosts) test -f $f.vnfe1 && /bin/rm -f $f.vnfe1 scp root@vnfe1:/etc/$f $f.vnfe1 diff $f $f.vnfe1 # Ensured that all are up to date end vnDistEtc passwd shadow group hosts.allow hosts.deny hosts vnallbgCommand 'service xinetd restart; exportfs -av; umount -a -t nfs -l; mount -a' vnCommand 'df' | tee /tmp/vn-df !!ssh matt@vnfe1.physics.ubc.ca cat /tmp/vn-df >>> Executing as root@142.103.237.1 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3349800 2390236 59% / /dev/hda6 6581928 1547428 5034500 24% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.2 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223628 2516408 57% / /dev/hda6 6581928 537452 6044476 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.3 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3224132 2515904 57% / /dev/hda6 6581928 521580 6060348 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.4 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3225164 2514872 57% / /dev/hda6 6581928 526380 6055548 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.5 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223924 2516112 57% / /dev/hda6 6581928 524580 6057348 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.6 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3229216 2510820 57% / /dev/hda6 6581928 500464 6081464 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.7 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223884 2516152 57% / /dev/hda6 6581928 795944 5785984 13% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.8 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223916 2516120 57% / /dev/hda6 6581928 550192 6031736 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.9 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223912 2516124 57% / /dev/hda6 6581928 591560 5990368 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.10 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223852 2516184 57% / /dev/hda6 6581928 642324 5939604 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853592 17896840 88% /d/bh0/home >>> Executing as root@142.103.237.11 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223952 2516084 57% / /dev/hda6 6581928 601756 5980172 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853596 17896836 88% /d/bh0/home >>> Executing as root@142.103.237.12 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3202488 2537548 56% / /dev/hda6 6581928 696244 5885684 11% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home >>> Executing as root@142.103.237.13 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223900 2516136 57% / /dev/hda6 6581928 721412 5860516 11% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home >>> Executing as root@142.103.237.14 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223800 2516236 57% / /dev/hda6 6581928 596144 5985784 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.15 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223892 2516144 57% / /dev/hda6 6581928 600152 5981776 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.16 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3220952 2519084 57% / /dev/hda6 6581928 561668 6020260 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.17 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223676 2516360 57% / /dev/hda6 6581928 574640 6007288 9% /scratch >>> Executing as root@142.103.237.18 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3480992 2259044 61% / /dev/hda6 6581928 577108 6004820 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.19 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3643964 2096072 64% / /dev/hda6 6581928 3336884 3245044 51% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.20 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223816 2516220 57% / /dev/hda6 6581928 581088 6000840 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.21 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3224180 2515856 57% / /dev/hda6 6581928 588536 5993392 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.22 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223568 2516468 57% / /dev/hda6 6581928 840336 5741592 13% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.23 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223864 2516172 57% / /dev/hda6 6581928 618004 5963924 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.24 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3233188 2506848 57% / /dev/hda6 6581928 587352 5994576 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.25 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6040288 3415692 2317760 60% / /dev/hda6 13156256 1304388 11851868 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.26 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223936 2516100 57% / /dev/hda6 6581928 590696 5991232 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.27 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223836 2516200 57% / /dev/hda6 6581928 601148 5980780 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.28 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3229940 2510096 57% / /dev/hda6 6581928 529976 6051952 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.29 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223524 2516512 57% / /dev/hda6 6581928 631900 5950028 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.30 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223688 2516348 57% / /dev/hda6 6581928 978296 5603632 15% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.31 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3224956 2515080 57% / /dev/hda6 6581928 586664 5995264 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.32 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223536 2516500 57% / /dev/hda6 6581928 583312 5998616 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.33 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223808 2516228 57% / /dev/hda6 6581928 552384 6029544 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.34 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223724 2516312 57% / /dev/hda6 6581928 787672 5794256 12% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.35 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223976 2516060 57% / /dev/hda6 6581928 482164 6099764 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.36 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223980 2516056 57% / /dev/hda6 6581928 419304 6162624 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.37 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223868 2516168 57% / /dev/hda6 6581928 480920 6101008 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.38 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223780 2516256 57% / /dev/hda6 6581928 578000 6003928 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.39 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223684 2516352 57% / /dev/hda6 6581928 734012 5847916 12% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.40 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223836 2516200 57% / /dev/hda6 6581928 431816 6150112 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539528 177368776 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.41 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223708 2516328 57% / /dev/hda6 6581928 460652 6121276 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.42 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3224384 2515652 57% / /dev/hda6 6581928 479700 6102228 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.43 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3212652 2527384 56% / /dev/hda6 6581928 742928 5839000 12% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.44 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223788 2516248 57% / /dev/hda6 6581928 480960 6100968 8% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.45 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223868 2516168 57% / /dev/hda6 6581928 445604 6136324 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.46 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223804 2516232 57% / /dev/hda6 6581928 407348 6174580 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.47 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223880 2516156 57% / /dev/hda6 6581928 702172 5879756 11% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.48 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223588 2516448 57% / /dev/hda6 6581928 448532 6133396 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539540 177368764 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.49 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223912 2516124 57% / /dev/hda6 6581928 450944 6130984 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.50 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223564 2516472 57% / /dev/hda6 6581928 682576 5899352 11% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.51 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223584 2516452 57% / /dev/hda6 6581928 457060 6124868 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853608 17896824 88% /d/bh0/home >>> Executing as root@142.103.237.52 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 4152640 1587396 73% / /dev/hda6 6581928 148420 6433508 3% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.53 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223748 2516288 57% / /dev/hda6 6581928 436368 6145560 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.54 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223884 2516152 57% / /dev/hda6 6581928 321000 6260928 5% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.55 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223916 2516120 57% / /dev/hda6 6581928 414552 6167376 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.56 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223968 2516068 57% / /dev/hda6 6581928 411460 6170468 7% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.57 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223796 2516240 57% / /dev/hda6 6581928 324372 6257556 5% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.58 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223808 2516228 57% / /dev/hda6 6581928 327316 6254612 5% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.59 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223768 2516268 57% / /dev/hda6 6581928 584612 5997316 9% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.60 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223792 2516244 57% / /dev/hda6 6581928 629184 5952744 10% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.61 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda1 6047220 3223500 2516536 57% / /dev/hda6 6581928 347376 6234552 6% /scratch vnfe1:/home 10958172 8344596 2613576 77% /d/vnfe1/home vnfe1:/home2 17496684 14286112 3210572 82% /d/vnfe1/home2 vnfe1:/home3 17496684 11855340 5641344 68% /d/vnfe2/home vnfe3:/home 10958172 9420480 1537692 86% /d/vnfe3/home vnfe3:/home2 17066300 14655236 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438796 785539548 177368756 82% /d/vnfe4/home bh0:/home 149336320 123853604 17896828 88% /d/bh0/home >>> Executing as root@142.103.237.62 Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda5 238299068 90746148 135447948 41% / vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home >>> Executing as root@142.103.237.63 Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda5 239334880 98954384 128222908 44% / vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home >>> Executing as root@142.103.237.64 Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda5 239334880 113570048 113607244 50% / vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home boson:/mandrake 115377640 108155352 1361376 99% /mandrake >>> Executing as root@142.103.237.65 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 191264348 7995892 173552748 5% / bh0:/home 149336320 123853600 17896832 88% /d/bh0/home vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home >>> Executing as root@142.103.237.66 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 46598404 61942760 43% / bh0:/home 149336320 123853600 17896832 88% /d/bh0/home vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home vn62:/home 238299072 90746152 135447952 41% /sting >>> Executing as root@142.103.237.67 Filesystem 1K-blocks Used Available Use% Mounted on /dev/hda5 114349864 91753636 16787528 85% / vnfe1:/home 10958176 8344600 2613576 77% /d/vnfe1/home vnfe1:/home2 17496688 14286112 3210576 82% /d/vnfe1/home2 vnfe1:/home3 17496688 11855344 5641344 68% /d/vnfe2/home vnfe3:/home 10958176 9420480 1537696 86% /d/vnfe3/home vnfe3:/home2 17066304 14655240 1522272 91% /d/vnfe3/home2 vnfe4:/home 1014438800 785539552 177368760 82% /d/vnfe4/home bh0:/home 149336320 123853600 17896832 88% /d/bh0/home # As root@vn17 # Fixed up /etc/fstab # TODO: None of Steve's machines file systems are getting mounted ############################################################ Sun Jul 16 20:53:18 PDT 2006 ############################################################ # From the Dorint Novotel Hotel, Berlin Am Tiergarten ofc # Nick Fameli reports problems with access to vnfe1. # No doubt same problem that afflicted Liam, Tom D, ... From fameli@phas.ubc.ca Sun Jul 16 20:53:26 2006 Date: Sat, 15 Jul 2006 17:35:47 -0700 (Pacific Daylight Time) From: Nicola Fameli To: choptuik@phas.ubc.ca Subject: access denied on vn Hi Matt, I just found out that I no longer have access to the vn machines (username nico). I have not used the cluster for a few months, but I doubt that that will be the reason. I also doubt I got the axe because I was storing huge amounts of stuff in there... Could you help please? Thanks, Nick # As matt@vnfe1 etc test -f passwd.vnfe1 && /bin/rm -f passwd.vnfe1 test -f shadow.vnfe1 && /bin/rm -f shadow.vnfe1 scp root@vnfe1:/etc/passwd passwd.vnfe1 scp root@vnfe1:/etc/shadow shadow.vnfe1 diff passwd passwd.vnfe1 diff shadow shadow.vnfe1 grep nico passwd nico:x:493:9000:Nicola Fameli:/d/vnfe1/home/nico:/bin/tcsh grep shadow passwd # NADA # As choptuik@phas.ubc.ca sudo pwentry fameli fameli:x:493:307:Nicola Fameli:/home/fameli:/bin/tcsh fameli:Xbkafj3pMADXw:11515::::::-1 # As matt@vnfe1 sola vs # Added nico entry grep nico shadow nico:Xbkafj3pMADXw:11515::::::-1 # OK vnDistEtc shadow vnCommand 'grep nico /etc/{passwd,shadow}' # OK # Send Nick a message From matt@bh0.physics.ubc.ca Sun Jul 16 21:14:02 2006 Date: Sun, 16 Jul 2006 21:13:44 -0700 (PDT) From: Matthew W. Choptuik To: Nicola Fameli Cc: choptuik@phas.ubc.ca Subject: Re: access denied on vn Dear Nick: As Maxwell Smart use to famously say "Sorry about that, Chief!" The master /etc/shadow file for the cluster somehow got corrupted a few months ago, with the result that shadow entries for a lot of users disappeared from the file. I've been (slowly) reconstructing it as individual users contact me re problems with access. With luck, I've now reset your password on the cluster to be the same as it is CURRENTLY on physics.ubc.ca Let me know should you have any further problems with access. Cheers from Berlin, Matt PS: And, yes, you are correct, I NEVER delete/deactivate an account unless I really need to (i.e., this is another area of "safe security" that I blithely disregard) On Sat, 15 Jul 2006, Nicola Fameli wrote: > Hi Matt, > > I just found out that I no longer have access to the vn machines (username > nico). I have not used the cluster for a few months, but I doubt that that > will be the reason. I also doubt I got the axe because I was storing huge > amounts of stuff in there... Could you help please? > > Thanks, > Nick > -------------------------------------------------------------------------- Matthew W. Choptuik|Dept. of Phys. & Astronomy, UBC|6224 Agricultural Road Vancouver BC, V6T 1Z1, Canada|Voice: (604) 822-2412|Fax: (604) 822-5324 choptuik@physics.ubc.ca|http://laplace.physics.ubc.ca/Members/matt/ ############################################################ Mon Jul 31 13:40:52 PDT 2006 ############################################################ # No /Public on vnfe1, bh0:/home not getting mounted # As root@vnfe1 mount -a & sleep 10 df # OK ############################################################ Wed Aug 2 10:36:09 PDT 2006 ############################################################ # As root@vnfe3 (TODO: rwhod working on vnfe1 w/o reboot!!) down vn16 down 13:11 # SEE README.CRASH (CRASH_206) # XXX-TODO: DIAGNOSIS Software? ############################################################ Mon Aug 28 08:02:04 PDT 2006 ############################################################ # As root@vnfe3 down vn16 down 21+18:29 # SEE README.CRASH (CRASH_207) # XXX-TODO: DIAGNOSIS MEMORY? ############################################################ Tue Aug 29 10:27:50 PDT 2006 ############################################################ # Mario Pineda can't log in, another case of missing # /etc/shadow entry # As matt@vnfe1 etc make import diff passwd /etc/passwd # OK vi /etc/shadow pineda:$1$TPfrAGvl$p7G6EPd3T.YbIAl0w.8d4.:13038:0:99999:7::: vnDistEtc shadow ############################################################ Fri Sep 15 10:34:36 PDT 2006 ############################################################ # Dealing with vn16, which has been down (again), for # over 2 weeks, XXX-TODO: Replace memory # See README.CRASH (CRASH_208) ############################################################ # History (README.NEWUSER - Adrian Cortes) # !!ssh matt@bh0 cat /tmp/v From pineda@zoology.ubc.ca Wed Sep 27 10:42:40 2006 Date: Mon, 11 Sep 2006 11:15:01 -0700 (PDT) From: pineda@zoology.ubc.ca To: choptuik@physics.ubc.ca Cc: ykochan@interchange.ubc.ca Subject: Request for vn account Dear Matt, I'd like to request a VN account for one of my students. She will use the cluster in a similar fashion as my previous student (Adrian Cortes, who has finished, i.e. you can probably terminate his account if you wish), low usage and small footprint. I will go over the system use policies with her and supervise her usage. Please CC me on any cluster related correspondence. 1) Full Name: Yu Ki Ophelia Chan 2) Preferred Login Name: ophelia 3) Alternate Login Name (if preferred unavailable/not allowed): ki 4) Group (see notes below): other 5) Preferred e-mail: ykochan@interchange.ubc.ca, pineda@zoology.ubc.ca 6) Contact Phone Number: N/A 7) Preferred Shell (see notes below): bash FILL OUT THE FOLLOWING ITEM *ONLY* IF YOU LISTED "OTHER" AS YOUR GROUP 8) Title (Faculty, post-doc, grad student, undergrad ...) and brief description of anticpated usage of cluster. If post-doc or grad student, please include name of research supervisor. Undergrad doing directed studies under the supervision of Mario Pineda-Krch (cosupervised by Sally Otto). Cluster will be used to run individual based models (in C/C++) of coevolution in predator-prey systems. Usage is anticipated to be low (<10 nodes at any given time). # New account for Yu Ki Ophelia Chan # # UBC ZOOL UG (Pineda) # # ophelia ############################################################ cd vn Arc README.USERS vi README.USERS 9081 ophelia # Yu Ki Ophelia Chan nu setenv U ophelia setenv UID 9081 # Sanity check echo "Looking for $UID in /etc/passwd" grep $UID /etc/passwd echo "Done looking for $UID in /etc/passwd" cat<${U} ${U}:x:${UID}:9000:Yu Ki Ophelia Chan:/d/vnfe1/home/${U}:/bin/bash END echo "Invoking vnNewUsers with file ${U}" cat $U vnNewUsers ${U} # As root@vnfe1 setenv U ophelia setenv G other cp ~phys410/.profile ~phys410/.aliases.bash ~${U}; chown ${U}.${G} ~${U}/{.profile,.aliases.bash} ssh ${U}@vnfe1 ssh ${U}@vn35 # OK # E-mailed Mario a request for Ophelia's encrypted password entry From pineda@zoology.ubc.ca Tue Oct 3 09:35:26 2006 Date: Wed, 27 Sep 2006 22:47:47 -0700 (PDT) From: Mario Pineda-Krch To: Matthew W. Choptuik Subject: Re: Request for vn account Matt, her enccrypted passwd is $1$fXv.eY2c$Znraotpn8JfR7ZbMOoJTh0 VN being mothballed is really sad, It was my favourite workhorse. Are any replacement planned? Cheers, Mario On Wed, 27 Sep 2006, Matthew W. Choptuik wrote: # As matt@vnfe1 etc sola; vs # $1$fXv.eY2c$Znraotpn8JfR7ZbMOoJTh0 vnDistEtc shadow # As root@vnfe1 setenv U ophelia setenv G other cd ~${U} cat<.forward ykochan@interchange.ubc.ca pineda@zoology.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} vnCommand finger ${U} # OK # As matt@vnfe1 nu cd Blurbs cp cortes ophelia vi !$ scp ophelia matt@bh0:/tmp # As matt@bh0- pine vnallCommand "grep '^ophelia' /etc/shadow;" # TODO-XXX: Duplicate on C2 (Do with Ludo's accounts) # DUPLICATE ACCOUNT ON head # TODO-XXX ############################################################ Tue Oct 3 09:33:48 PDT 2006 ############################################################ # TODO-XXX: Create group for Ludo and accounts # As matt@vnfe1 etc sola get group vi group waerbeke::9300 vnDistEtc group From waerbeke@phas.ubc.ca Tue Oct 3 12:34:39 2006 Date: Tue, 12 Sep 2006 12:34:56 -0700 (PDT) From: Ludovic Van Waerbeke To: choptuik@phas.ubc.ca Subject: usernames Hi Matt, here are the IDs of my users collaborators who need access: UBC: Ludovic Van Waerbeke (waerbeke@phas.ubc.ca) Catherine Heymans (heymans@phas.ubc.ca) postdoc Jonathan Benjamin (jonben@phas.ubc.ca) MSc Sanaz Vafaei (svafaei@phas.ubc.ca) MSc Martha Milkeraitis (martham@phas.ubc.ca) MSc canadian: Henk Hoekstra (hoekstra@uvic.ca) faculty international: Elisabetta Semboloni (sembolon@iap.fr) postdoc Karim Benabed (benabed@iap.fr) faculty Thomas Erben (terben@astro.uni-bonn.de) faculty Patrick Hudelot (phudelot@astro.uni-bonn.de) postdoc Mikael Jarvis (michael@jarvis.net) postdoc Bhuvnesh Jain (bjain@astro.physics.upenn.edu) faculty my other machine is in Brett's lab, i'm backing up the raid5 data partition right now (will be done tonight) in preparation of an OS upgrade. after an accidental manipulation we screwed up the /home partition table, but i think the data are ok. we (jonben and I) intend to use "testdisk" to recover the data on /home, have you heard anything bad about this software? cheers ludo -- University of British Columbia, Department of Physics & Astronomy, 6224 Agricultural Road, Vancouver, B.C. V6T 1Z1, Canada. Phone: +1 604 822 5515 Fax: +1 604 822 5324. http://www.physics.ubc.ca/~waerbeke # Start with those having PHAS accounts Ludovic Van Waerbeke (waerbeke@phas.ubc.ca) Catherine Heymans (heymans@phas.ubc.ca) postdoc Jonathan Benjamin (jonben@phas.ubc.ca) MSc Sanaz Vafaei (svafaei@phas.ubc.ca) MSc Martha Milkeraitis (martham@phas.ubc.ca) MSc # As choptuik@physics waerbeke:x:276:307:Ludovic Van Waerbeke:/home/waerbeke:/bin/tcsh waerbeke:S6oiFloGtTefo:12909:::::: heymans:x:783:307:Catherine Heymans:/home/heymans:/bin/tcsh heymans:U2srEKHYZl.hA:12893:::::: jonben:x:343:307:Jonathan Benjamin:/home2/jonben:/bin/tcsh jonben:.WwjRuRj3YwAA:12542:::::: svafaei:x:700:307:Sanaz Vafaei:/home2/svafaei:/bin/tcsh svafaei:ko3k7sSqDetQQ:12660:::::: martham:x:13437:400:Martha Anne Milkeraitis:/home2/martham:/bin/tcsh martham:8.QjfkAXzWpUo:12321:::::: # As matt@vnfe1 foreach u (243 276 783 343 700 13437) grep $u /etc/passwd end foreach u (matt waerbeke heymans jonben svafaei martham) grep $u /etc/passwd end nu cp batch_nov15 batch_vw vi batch_vw # Distribute to # # waerbeke # heymans # jonben # svafaei # martham vnNewUsers waerbeke vnNewUsers heymans vnNewUsers jonben vnNewUsers svafaei vnNewUsers martham foreach u (waerbeke heymans jonben svafaei martham) ssh ${u}@vnfe1 date ssh ${u}@vn35 date end etc sola vs # Hacked encrypted passwords vnDistEtc shadow ############################################################ Mon Oct 16 11:06:08 PDT 2006 ############################################################ # New account for Thomas Erben, Ludo's collaborator From terben@astro.uni-bonn.de Mon Oct 16 11:07:51 2006 Date: Tue, 10 Oct 2006 22:56:19 +0200 (CEST) From: Thomas Erben To: Ludovic Van Waerbeke Cc: matt@bh0.physics.ubc.ca Subject: Re: [Fwd: Re: Access to cluster] Dear Matt, Ludovic asked me to contact you because of an account on his new machine. Here is the /etc/shadow entry that you can use for the setup: terben:$1$tuPnOnay$Btp2n0qn.tgmu0IXP6Zhv.:13286:0:99999:7::: With best regards, Thomas Erben # As matt@vnfe1 cd vn vi README.USERS 9301 terben # Thomas Erben nu vi terben terben:x:9301:9300:Thomas Erben:/d/vnfe1/home/terben:/bin/tcsh etc get passwd get shadow diff passwd /etc/passwd # OK nu vnNewUsers terben ssh terben@vnfe1 date ssh terben@vn35 date # OK etc sola vs # $1$tuPnOnay$Btp2n0qn.tgmu0IXP6Zhv. vnDistEtc shadow ############################################################ Thu Nov 2 15:45:30 PST 2006 ############################################################ # New account for Ovidiu Toader (Support for Ludo's machine) cd vn vi README.USERS 9082 ovi # Ovidiu Toader nu vi ovi ovi:x:9082:9000:Ovidiu Toader:/d/vnfe1/home/ovi:/bin/bash etc sola get passwd get shadow diff passwd /etc/passwd # OK nu vnNewUsers ovi ssh ovi@vnfe1 date ssh ovi@vn35 date # OK etc sola vs # $1$vt4s7dDW$TFW5nTKgZWXY9fFoHGvRp. vnDistEtc shadow # As root@vnfe1 setenv U ovi setenv G other cd ~${U} cat<.forward ovi@dms.phas.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} # TODO: Duplicate on C2 ############################################################ Tue Feb 20 15:26:14 PST 2007 ############################################################ # Willem Atsma can't log in # As root@vnfe1 grep watsma /etc/{shadow,passwd} /etc/passwd:watsma:!:9076:9000:Willem Atsma:/d/vnfe3/home2/watsma:/bin/bash # As root@vnfe4 grep watsma /etc/{shadow,passwd} /etc/shadow:watsma:$1$4VvNAUtx$/YN2XF63aHcDLdvYDTeDo.:12506:0:99999:7::: /etc/passwd:watsma:x:9076:9000:Willem Atsma:/d/vnfe4/home/watsma:/bin/bash # As matt@vnfe1 etc sola get shadow vs watsma:$1$4VvNAUtx$/YN2XF63aHcDLdvYDTeDo.:12506:0:99999:7::: vnDistEtc shadow # Send him message to give it a go, and get back to me should there # be any problems. ############################################################ Thu Mar 1 11:50:48 PST 2007 ############################################################ # New account for Kerstin Wielage (UBC PDF Frigaard) cd vn vi README.USERS 9083 wielage # Kerstin Wielage (UBC MATH PDF Frigaard) grep 9083 /etc/passwd # Good thing I did that since Ingrid Stairs is 9083! grep 9084 /etc/passwd # Good to go 9084 wielage # Kerstin Wielage (UBC MATH PDF Frigaard) nu vi wielage wielage:x:9084:9000:Kerstin Wielage:/d/vnfe1/home/wielage:/bin/tcsh etc sola get passwd get shadow diff passwd /etc/passwd # OK nu vnNewUsers wielage ssh wielage@vnfe1 date ssh wielage@vn35 date # OK # etc sola vs # $1$j9GlFSyc$IaI5pVen92oEQq5znZAtH1 vnDistEtc shadow # As root@vnfe1 setenv U wielage setenv G other cd ~${U} cat<.forward wielage@math.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} # TODO: Duplicate on C2 ############################################################ Thu Mar 1 18:39:12 PST 2007 ############################################################ # New account for Andreas Putz (UBC MATH GS Frigaard) cd vn grep 9085 /etc/passwd vi README.USERS 9085 putza # Andreas Putz (UBC MATH GS Frigaard) nu vi putza putza:x:9085:9000:Andreas Michael Vincent Putz:/d/vnfe1/home/putza:/bin/bash etc sola get passwd get shadow diff passwd /etc/passwd # OK nu vnNewUsers putza ssh putza@vnfe1 date ssh putza@vn35 date # OK # etc sola vs # # $1$3oNNncp/$iwKU8Wz3wWqOh7PGLhVxK/ # vnDistEtc shadow # As root@vnfe1 setenv U putza setenv G other cd ~${U} cat<.forward putza@math.ubc.ca END chown ${U}.${G} .forward; ls -al; finger ${U} # TODO: Duplicate on C2 ############################################################ Mon Apr 23 09:29:00 PDT 2007 ############################################################ # Preparations for decommissioning, need to assemble e-mail # list, send out message, and post on web site # Work in matt@bh0:/home/matt/system/vn-decom vi thelist # As matt@vnfe1 cut -d: -f 1,5 /etc/passwd > /tmp/list Rcat /tmp/list ssh matt@vnfe1.physics.ubc.ca cat /tmp/list ############################################################ Tue Jun 19 10:15:51 PDT 2007 ############################################################ # Moving vnfe[13] to Hennings 403 # Have new IPs from Mary Ann Here's your new ip addresses: vnfe1.phas.ubc.ca 142.103.234.52 vnfe3.phas.ubc.ca 142.103.234.54 # Unmount vnfe[13] mounted disks allbgCommand 'umount vnfe1:/home' allbgCommand 'umount vnfe1:/home2' allbgCommand 'umount vnfe1:/home3' allbgCommand 'umount vnfe3:/home' allbgCommand 'umount vnfe3:/home2' # Machines in office and up with new IP numbers # Change hosts on # bh # lnx # vn # As matt@Bh etcbh sola get hosts get hosts.allow vi hosts vi hosts.allow bhDistEtc hosts.allow bhbgCommand 'service xinetd restart' bhbgCommand 'exportfs -av' # As matt@Bh0 etclnx get hosts get hosts.allow vi hosts vi hosts.allow lnxDistEtc hosts.allow lnxbgCommand 'service xinetd restart' lnxbgCommand 'exportfs -av' # As root@vnfe[13] cd /etc scp root@bh0:/etc/hosts . scp root@bh0:/etc/hosts.allow . service nfs restart service xinetd restart exportfs -av # As matt@vnfe1 etc get hosts get hosts.allow # As root@string ssh boson cd /etc cp hosts.allow hosts.allow.O vi hosts.allow # Add 142.103.234.[52,54] to portmap service xinetd restart exportfs -av # As matt@bh0 bhCommand 'mount -a' bhCommand 'df | grep vn' # OK # As root@vnfe[13] service nfs restart mount -a # OK # Tue Aug 5 14:25:33 PDT 2008 # Due to a possible MB failure # vnfe[1-3] were backed up onto bh machines # As root@vnfe3 scp -r /home "bh4:/home/vn_backup/vnfe3/home/." # OK # As root@vnfe3 scp -r /home2 "bh4:/home/vn_backup/vnfe3/home2/." # OK # As root@vnfe1 (phsically attached vnfe1, vnfe2 on vnfe3) scp -r /home "bh5:/home/vn_backup/vnfe1/home/." scp -r /home "bh6:/home/vn_backup/vnfe1/home3/." scp -r /home "bh3:/home/vn_backup/vnfe1/home2/." #