Sunday, June 20, 2010

Mount iso file in Solaris

root@pracdb01 # lofiadm -a /u01/app/StorageTek_QFS_4[1].6.iso
/dev/lofi/1
root@pracdb01 #
root@pracdb01 #
root@pracdb01 #

root@pracdb01 # mount -F hsfs /dev/lofi/1 /mnt
root@pracdb01 #
root@pracdb01 # cd /mnt
root@pracdb01 #
root@pracdb01 #
root@pracdb01 # ls -l
total 20
drwxr-xr-x 6 root other 2048 Mar 20 2007 linux1
drwxr-xr-x 5 root other 2048 Mar 19 2007 linux2
drwxr-xr-x 5 root other 2048 Mar 19 2007 sparc
drwxr-xr-x 5 root other 2048 Mar 19 2007 worm
drwxr-xr-x 4 root other 2048 Mar 19 2007 x64

Brocade Switch Zone Configuration

Bkp_Bro1:admin> alicreate "PRACDB01_S1P21", "21:00:00:1b:32:1c:3c:0c"
Bkp_Bro1:admin>
Bkp_Bro1:admin> zonecreate "PRACDB01_s1_9990_1E", "PRACDB01_S1P21; SE9990_1E"

Bkp_Bro1:admin> cfgadd "PROD_TAPE_ZONE1", "PRACDB01_s1_9990_1E"
Bkp_Bro1:admin> cfgsave
You are about to save the Defined zoning configuration. This
action will only save the changes on Defined configuration.
Any changes made on the Effective configuration will not
take effect until it is re-enabled.
Do you want to save Defined zoning configuration only? (yes, y, no, n): [no] y
Updating flash ...

Bkp_Bro1:admin> cfgenable "PROD_TAPE_ZONE1"
You are about to enable a new zoning configuration.
This action will replace the old zoning configuration with the
current configuration selected.
Do you want to enable 'PROD_TAPE_ZONE1' configuration (yes, y, no, n): [no] y
zone config "PROD_TAPE_ZONE1" is in effect
Updating flash ...


2nd Port
============
Bkp_Bro1:admin> alicreate "PRACDB02_S1P22", "21:00:00:1b:32:83:7c:71"

Bkp_Bro1:admin>
Bkp_Bro1:admin> zonecreate "PRACDB02_s1_9990_1E", "PRACDB02_S1P22; SE9990_1E"

Bkp_Bro1:admin> cfgadd "PROD_TAPE_ZONE1", "PRACDB02_s1_9990_1E"
Bkp_Bro1:admin> cfgsave
You are about to save the Defined zoning configuration. This
action will only save the changes on Defined configuration.
Any changes made on the Effective configuration will not
take effect until it is re-enabled.
Do you want to save Defined zoning configuration only? (yes, y, no, n): [no] y
Updating flash ...

Bkp_Bro1:admin> cfgenable "PROD_TAPE_ZONE1"
You are about to enable a new zoning configuration.
This action will replace the old zoning configuration with the
current configuration selected.
Do you want to enable 'PROD_TAPE_ZONE1' configuration (yes, y, no, n): [no] y
zone config "PROD_TAPE_ZONE1" is in effect
Updating flash ...




Switch 134
=============


Bkp_Bro2:admin> alicreate "PRACDB01_S2P21", "21:00:00:1b:32:83:a0:77"
Bkp_Bro2:admin> zonecreate "PRACDB01_S2_9990_2E", "PRACDB01_S2P21; SE9990_2E"
Bkp_Bro2:admin> cfgadd "PROD_TAPE_ZONE2", "PRACDB01_S2_9990_2E"
Bkp_Bro2:admin> cfgsave
You are about to save the Defined zoning configuration. This
action will only save the changes on Defined configuration.
Any changes made on the Effective configuration will not
take effect until it is re-enabled.
Do you want to save Defined zoning configuration only? (yes, y, no, n): [no] y
Updating flash ...
Bkp_Bro2:admin> cfgenable "PROD_TAPE_ZONE2"
You are about to enable a new zoning configuration.
This action will replace the old zoning configuration with the
current configuration selected.
Do you want to enable 'PROD_TAPE_ZONE2' configuration (yes, y, no, n): [no] y
zone config "PROD_TAPE_ZONE2" is in effect
Updating flash ...
Bkp_Bro2:admin>


2nd port
========


Bkp_Bro2:admin> alicreate "PRACDB02_S2P22", "21:00:00:1b:32:84:76:72"
Bkp_Bro2:admin> zonecreate "PRACDB02_S2_9990_2E", "PRACDB02_S2P22; SE9990_2E"
Bkp_Bro2:admin> cfgadd "PROD_TAPE_ZONE2", "PRACDB02_S2_9990_2E"
Bkp_Bro2:admin> cfgsave
You are about to save the Defined zoning configuration. This
action will only save the changes on Defined configuration.
Any changes made on the Effective configuration will not
take effect until it is re-enabled.
Do you want to save Defined zoning configuration only? (yes, y, no, n): [no] y
Updating flash ...
Bkp_Bro2:admin> cfgenable "PROD_TAPE_ZONE2"
You are about to enable a new zoning configuration.
This action will replace the old zoning configuration with the
current configuration selected.
Do you want to enable 'PROD_TAPE_ZONE2' configuration (yes, y, no, n): [no] y
zone config "PROD_TAPE_ZONE2" is in effect
Updating flash ...

procedure to remove the scsi reservation

Please follow below procedure to remove the scsi reservation and then again reconfiguring the storage devices.

# /usr/cluster/lib/sc/scsi -c disfailfast -d /dev/did/rdsk/d5s2
# /usr/cluster/lib/sc/scsi -c release -d /dev/did/rdsk/d5s2
# /usr/cluster/lib/sc/scsi -c scrub -d /dev/did/rdsk/d5s2


# /usr/cluster/lib/sc/scsi -c disfailfast -d /dev/did/rdsk/d6s2
# /usr/cluster/lib/sc/scsi -c release -d /dev/did/rdsk/d6s2
# /usr/cluster/lib/sc/scsi -c scrub -d /dev/did/rdsk/d6s2



# /usr/cluster/lib/sc/scsi -c disfailfast -d /dev/did/rdsk/d1s2
# /usr/cluster/lib/sc/scsi -c release -d /dev/did/rdsk/d1s2
# /usr/cluster/lib/sc/scsi -c scrub -d /dev/did/rdsk/d1s2

Check the reservation keys on the storage devices, there should not be any key :

# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d5s2

# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d6s2

# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d1s2

Run the
#scgdevs
command to reconfigure the storage devices
Check reservation keys :

# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d5s2
# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d6s2
# /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d1s2

===========
The other things which I found is that one of the path to the storage is not accessible :

From node "pracdb01 "

cores@fs-cores-brm-sc3b $ more ../disks/*port*
/devices/pci@1,700000/SUNW,qlc@0/fp@0,0:devctl CONNECTED
/devices/pci@1,700000/SUNW,qlc@0,1/fp@0,0:devctl NOT CONNECTED
/devices/pci@3,700000/SUNW,qlc@0/fp@0,0:devctl CONNECTED
/devices/pci@3,700000/SUNW,qlc@0,1/fp@0,0:devctl NOT CONNECTED
cores@fs-cores-brm-sc3b $ cd ../etc/driver_

From Node "pracdb02 "

cores@fs-cores-brm-sc3b $ more *port*
/devices/pci@1,700000/SUNW,qlc@0/fp@0,0:devctl CONNECTED
/devices/pci@1,700000/SUNW,qlc@0,1/fp@0,0:devctl NOT CONNECTED
/devices/pci@3,700000/SUNW,qlc@0/fp@0,0:devctl CONNECTED
/devices/pci@3,700000/SUNW,qlc@0,1/fp@0,0:devctl NOT CONNECTED
cores@fs-cores-brm-sc3b $


Could you please ensure that the storage is accessible from the host from both the paths?


root@pracdb01 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d5s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb01 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d6s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb01 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d1s2
Reservation keys(2):
0x4a6ec47800000001
0x4a6ec47800000002
root@pracdb01 # rsh pracdb02
Last login: Thu Sep 10 17:45:45 from pracdb01
Sun Microsystems Inc. SunOS 5.10 Generic January 2005
Sourcing //.profile-EIS.....
root@pracdb02 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d5s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb02 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d6s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb02 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d1s2
Reservation keys(2):
0x4a6ec47800000001
0x4a6ec47800000002
root@pracdb02 # rsh pracdb03
Last login: Fri Sep 11 11:28:13 from pracdb01
Sun Microsystems Inc. SunOS 5.10 Generic January 2005
Sourcing //.profile-EIS.....
root@pracdb03 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d5s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb03 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d6s2
Reservation keys(3):
0x4a6ec47800000001
0x4a6ec47800000002
0x4a6ec47800000003
root@pracdb03 # /usr/cluster/lib/sc/scsi -c inkeys -d /dev/did/rdsk/d1s2
Reservation keys(2):
0x4a6ec47800000001
0x4a6ec47800000002
root@pracdb03 # /usr/cluster/lib/sc/scsi -c disfailfast -d /dev/did/rdsk/d5s2
do_enfailfast returned 0
root@pracdb03 #

Friday, June 18, 2010

Cluser RG not switching to PSBLD008- Action Plan

Tata Sky : Action Plan for 11212245
Contents:
1) Problem details :
2) Service impact :
3) Action plan.
Problem details:
Cluster RG not switching to PSBLD008.
Service Impact:
Siebel services outage for 1 hr.
Action Plan
Detach the root mirror in both servers.
PSBLD008:
# metadetach main mirror sub mirror
PSBLD007:
# metadetach main mirror sub mirror
On node PSBLD008:
# init 0
on node PSBLD007:
shutdown oracle server, listener & file system resource using sun cluster.
# metaset -s Siebel-DG -f -d -h PSBLD008
(this may take a minute or so to return)
on node PSBLD008:
ok> boot
(wait for not to fully join the cluster)
on node PSBLD007:
# metaset -s Siebel-DG -a -h PSBLD008
on node PSBLD008:
# metaset (should now list the metaset Siebel-DG)
test switchover with a:
# scswitch -z -g Siebel-RG -h PSBLD008

RCA for Siebel problem and Best practices recommendation

Tata Sky : RCA for Siebel problem and Best practices recommendation
Problem Summary : Users were unable to contact the Web server of Siebel application
for apx 10 minutes.
Problem details:
On 4th Jan 07 Webserver was unable to communicate to load balancer for
10 mins. No users were able to access the application. This problem
was resolved automatically. Following message were observed in
Webserver logs
[04/Jan/2007:16:00:39] failure (17568): HTTP3068: Error receiving
request from 10.1.19.44 (Connection refused)
Event Time Lines:
Event Date: 4th January 2007
· Event Time: 15:50 to 16:01
· Problem reported: 8th January 2007
· Domains services restored: Siebel
· Diagnosis/Analysis time:
· Reboot time:
· H/W replacement time:
Diagnosis summary :
1) Corresponding to problem time only relevant message available is “web server was not
able to reach load balancer.”
2) Users also did not reach beyond load balancer.” As per onsite team.
Since no other data is available to pin point the cause of issue with load balancer or
network or web server , approach was taken to analyze full setup and plan all best
practices to prevent from re-occurrence.
Analysis:
Summary:
There were 4 cases logged related to Siebel setup problem.
10965815 Siebel application server restarted as user sessions were hung
10966494 Web server ping packet drop
10968351 Siebel db performance problems
10979783 Webserver unable to communicate with the application server
These are the highlights of the Analysis:
web server error message indicates that client opened the connection and it is
Tata Sky : RCA for Siebel problem and Best practices recommendation
closed from the client side before the webserver managed to read any data from that
connection. For Web server, Cisco Load balancer (Logical IP) is the immediate client. It
is possible that the connections between the load balancer and the client are also
disconnected.
There is no packet drop in the network between web server and Cisco load balancer while
the testing was carried out after the problem was observed. However there is a message
coming from the load balancer. Please check with Cisco for the message.
Workaround if any:
Suggested Fix and recommendations:
1) Implement the best practices
2) Collect most of the logical data at problem time.
3) Enable debug options in application and network level.
4) Implement the NFS option planned for image files store agreed in in phase 1b
architecture layout
Following are the best practice recommendation :
Web server:
1.From the given magnus.conf file of the webserver, KeepAliveTimeout is set to 1200
seconds (2 Hrs). Default value is 30 seconds. In the multi-tier architecture, it is best to set
the KeepAliveTimeout as zero.
2.Please modify following entry from /etc/system
Remove the following entry
set segkmem_lpsize=0x400000
add the following entry
set pcie:pcie_aer_ce_mask=0x1
3.Install the latest level EIS CD Patches.
4.Transition to e1000g
Convert from ipge to e1000g by installing patch 123334-02 and running
the script provided
5. Please refer to the following guide which provides the guidance for
the performance and tuning.
http://docs.sun.com/app/docs/doc/817-6249
Tata Sky : RCA for Siebel problem and Best practices recommendation
Action when case of web server is unresponsive:
Please confirm the webserver hang or unresponsive by accessing the static pages or telnet
to the system for http port. If both of them results in time out, we can confirm that
webserver is hung.
Identity the webserver child process as follows:-
1. ps -ef | grep webservd | grep
Highest number on the pid is the chile pid.
With this PID, we need to collect the following details:-
1. Open the terminal1, run prstat against the pid as given below.
# prstat -L -c -p -o prstat.hung
Run this command for 3 minutes, terminate it using +c keystrokes.
2. Meanwhile, open another terminal2, issue kill -3 command successively for 3 times
with the interval of a minute.
This will create the java thread dump in the errors log file.
3. In the terminal 2, run pstack, pmap, pldd and pfiles against pid.
# pstack pid > pstack.hung
# pmap pid > pmap.hung
# pldd pid > pldd.hung
# pfiles pid > pfiles.hung
4. In the terminal2, run gcore for generating the core file.
# gcore pid-- This will create the core file as core.pid in the present working directory.
5. Run the pkgcore script for collecting the binaries & libraries for root cause analysis.
#pkgcore.sh {case id} {core.pid} {pid}
This will create the packages such as caseid_corefiles.tar.gz & caseid_libraries.tar.gz
6. netstat -na > netstat.hung
Tata Sky : RCA for Siebel problem and Best practices recommendation
Application Server PSBLA001
1.Please modify following entry from /etc/system
Remove the following entry
set segkmem_lpsize=0x400000
set ip:ip_squeue_bind = 0
set ip:ip_squeue_fanout = 1
set ipge:ipge_tx_syncq=1
set ipge:ipge_bcopy_thresh = 512
set ipge:ipge_dvma_thresh = 1
set consistent_coloring=2
Add the following setting
set pcie:pcie_aer_ce_mask=0x1
2.Install the latest level EIS CD Patches.
3. Transition to e1000g
Convert from ipge to e1000g by installing patch 123334-02 and running
the script provided
Action When Application server is hung:
1.PID of the application process
2. truss -o truss.out -ealfd -vall -p "pid of the application"
3. pstack "pid of the application" ==> get it 3 times.
4. snoop -o snoop.out -d
5. ndd /dev/tcp tcp_listen_hash
6. Savecore -L
7. guds output
8. prstat -mvL -n 10 1 600
9. iostat -xnz 1 600
10. mpstat 1 600
11. vmstat 1 600
12.lockstat -C -s 50 sleep 30 lockstat -H -s 50 sleep 30
13.lockstat -kIW -s 50 -i 971 sleep 30
Siebel Database server:
1. Modify the following entries From /etc/system
Remove
set ce_reclaim_pending=1
exclude: lofs
Tata Sky : RCA for Siebel problem and Best practices recommendation
add
set ce:ce_bcopy_threash=97
set ce:ce_dvma_thresh=96
set ce:ce_ring_size=8192
set ce:ce_comp_ring_size=8192
set ce:ce_tx_ring_size=8192
set sq_max_size=100
2.Please note that Dumpdevice : /dev/dsk/c0t0d0s1 is a Submirror of
Swap-Metadevice /dev/md/dsk/d101
Change dumpdevice to Swapmirror : /dev/md/dsk/d101 with: "dumpadm -d
swap"
3. Install the latest level EIS CD Patches which includes cluster
patches.
Action Plan:
Team:
SSE: Vinod SAM: Rajesh
Onsite Team: Prashant Customer engineer/Sysadmin:Shams Khan

PKEND021 Secondary Disk Failure Action Plan

Hi Shams, CASEID:: 11269883

Thank you for the file.

root@PKEND021 # metadb
flags first blk block count
a m p lu 16 8192 /dev/dsk/c0t0d0s7
a p l 8208 8192 /dev/dsk/c0t0d0s7
a p l 16400 8192 /dev/dsk/c0t0d0s7
M p 16 unknown /dev/dsk/c1t0d0s7
M p 8208 unknown /dev/dsk/c1t0d0s7
M p 16400 unknown /dev/dsk/c1t0d0s7

From the output, we can see all the state replica on c1t0d0 are bad.

Use the metadb command to delete them. For example:

# metadb -d c1t0d0s7

Once that is deleted, the next steps is to replaced the disk.

Part II: Replacing failed boot device

1. Gracefully power-down the system with this command:

# init 5

2. Physically replace the failed boot device.



Part III: Repairing state replica database

1. If you use Solaris[TM] Volume Manager on Solaris[TM] 9 or later,
update the state database with the device ID for the new disk using
metadevadm -u c#t#d# .

# metadevadm -u c1t0d0s7

2. Once new boot disk is repartitioned, add new working state replicas
back into the newly replaced disk drive. For example:

# metadb -a -c 3 c1t0d0s7

(The -c #; specifies how many replicas to put into the specified partition)

Part IV: Resyncing the sub-mirrors

1. Run metstat to find all the metadevices that the failed boot device
belongs to. For example:

d0: Mirror
Submirror 0: d1
State: Needs maintenance
Submirror 1: d2
State: Okay
Pass: 1
Read option: roundrobin (default)
Write option: parallel (default)
Size: 205200 blocks

d1: Submirror of d0
State: Needs maintenance
Size: 205200 blocks
Stripe 0:
Device Start Block Dbase State Hot Spare
c0t0d0s0 0 No Okay


d2: Submirror of d0
State: Okay
Size: 205200 blocks
Stripe 0:
Device Start Block Dbase State Hot Spare
c1t2d0s0 0 No Okay


2. Use the metareplace command to re-enable the sub-mirror. For example:

# metareplace -e d0 c0t0d0s0

(Resync operation may take about 15-20 minutes per every gigabyte of
filesystem)

3. Repeat metareplace command to re-enable the other sub-mirrors located
on the same disk:

# metareplace -e d c0t0d0smaintenance>

4. Reboot system to have it boot from the newly repaired boot device:

Before rebooting, wait for the resync : all metadevices must be in
'Okay' state, then :

# init 6


I will proceed to order the 146 GB disk on D240 storage.

E20K adding Board from PKEND021 to PSBLD008

bash-2.05$ showplatform -p domains

Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - - Solaris Halted, in OBP
B - PKENA019 Running Solaris
C - PSBLD008 Running Solaris
D - - Powered Off
E - PEAID015 Running Solaris
F - - Powered Off
G - - Powered Off
H - - Powered Off
I - - Powered Off
J - - Powered Off
K - - Powered Off
L - - Powered Off
M - - Powered Off
N - - Powered Off
O - - Powered Off
P - - Powered Off
Q - - Powered Off
R - - Powered Off

bash-2.05$ showboards
Retrieving board information. Please wait.
.......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 - Empty Slot Assigned - A
SB1 On V3CPU Active Passed A
SB2 On V3CPU Active Passed B
SB3 On V3CPU Active Passed C
SB4 On V3CPU Active Passed C
SB5 - Empty Slot Assigned - C
SB6 On V3CPU Active Passed C
SB7 - Empty Slot Available - Isolated
SB8 On V3CPU Active Passed E
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO2 On HPCI+ Active Passed B
IO3 On HPCI+ Active Passed B
IO4 On HPCI+ Active Passed C
IO5 On HPCI+ Active Passed C
IO6 Off HPCI+ Assigned Unknown D
IO7 Off HPCI+ Assigned Unknown D
IO8 On HPCI+ Active Passed E
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated

bash-2.05$ setkeyswitch -d A off
Current virtual key switch position is "ON".
Are you sure you want to change to the "OFF" position (yes/no)? yes
Domain is down.
Waiting on exclusive access to EXB(s): 3FFFF.
Component not present: SB0
Powering off: HPCI+ at IO0
Powering off: EXB at EX0
Powering off: V3CPU at SB1
Powering off: HPCI+ at IO1
Powering off: EXB at EX1
bash-2.05$



Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - - Powered Off
B - PKENA019 Running Solaris
C - PSBLD008 Running Solaris
D - - Powered Off
E - PEAID015 Running Solaris
F - - Powered Off
G - - Powered Off
H - - Powered Off
I - - Powered Off
J - - Powered Off
K - - Powered Off
L - - Powered Off
M - - Powered Off
N - - Powered Off
O - - Powered Off
P - - Powered Off
Q - - Powered Off
R - - Powered Off

bash-2.05$ addboard -d C SB1
assign SB1
.
assign SB1 done
poweron SB1
.............
poweron SB1 done
test SB1 ........... test SB1 done
connect SB1 ........ connect SB1 done
configure SB1
.....
configure SB1 done
.
notify online SUNW_cpu/cpu32
notify online SUNW_cpu/cpu36
notify online SUNW_cpu/cpu33
notify online SUNW_cpu/cpu37
notify online SUNW_cpu/cpu34
notify online SUNW_cpu/cpu38
notify online SUNW_cpu/cpu35
notify online SUNW_cpu/cpu39
..
notify add capacity (8 cpus)
notify add capacity (2097152 pages)
notify add capacity SB1 done


Domain A Board SB0 and SB1 is active SB1 is Main Board



root@PKEND021 # cfgadm -alv |grep permanent
SB1::memory connected configured ok base address 0x2000000000, 16777216 KBytes total, 1996640 KBytes permanent
root@PKEND021 #

From PKEND021 Host
# cfgadm -c unconfigure SB0
# cfgadm -c disconnect SB0

Login to SC 10.1.18.122

root@PBAKB034 # rsh 10.1.18.122
Password:
Last login: Mon Jun 9 12:29:46 from 10.1.18.85
Sun Microsystems Inc. SunOS 5.9 Generic May 2002
Sourcing //.profile-EIS.....
root@T-Sky-20K-2-sc1 # su - sms-svc
T-Sky-20K-2-sc1:sms-svc:1> bash
bash-2.05$ showplatform -p domains

Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - PKEND021 Running Solaris
B - PKENA019 Running Solaris
C - PSBLD008 Running Solaris
D - PSAPA013 Running Solaris
E - PEAID015 Running Solaris


# deleteboard SB0

# addboard -d C SB0

Check the status of the board

# showboards -d C



Removing Board from PSBLD007

root@PSBLD007 #cfgadm -alv |grep -i perm
SB5::memory connected configured ok base address 0x1c000000000, 16777216 KBytes total, 3518488 KBytes permanent


root@PSBLD007 # cfgadm -c unconfigure SB4
root@PSBLD007 # cfgadm -c disconnect SB4


Login to SC 10.1.18.112
su - sms-svc
showplatform -p doamins

deleteboard SB4


Activity done on 31st July 2008

root@PSBLD007 # cfgadm -al |grep SB4
SB4 V3CPU connected configured ok
SB4::cpu0 cpu connected configured ok
SB4::cpu1 cpu connected configured ok
SB4::cpu2 cpu connected configured ok
SB4::cpu3 cpu connected configured ok
SB4::memory memory connected configured ok
root@PSBLD007 # Jul 31 14:08:14 PSBLD007 login: ROOT LOGIN /dev/pts/1 FROM PBAKB034

root@PSBLD007 # cfgadm -c unconfigure SB4
Jul 31 14:11:16 PSBLD007 dr: OS unconfigure dr@0:SB4::cpu0
Jul 31 14:11:28 PSBLD007 dr: OS unconfigure dr@0:SB4::cpu1
Jul 31 14:11:49 PSBLD007 dr: OS unconfigure dr@0:SB4::cpu2
Jul 31 14:12:00 PSBLD007 dr: OS unconfigure dr@0:SB4::cpu3
Jul 31 14:12:21 PSBLD007 dr: OS unconfigure dr@0:SB4::memory
you have mail


root@PSBLD007 # cfgadm -al
Ap_Id Type Receptacle Occupant Condition
IO4 HPCI+ connected configured ok
IO4::pci0 io connected configured ok
IO4::pci1 io connected configured ok
IO4::pci2 io connected configured ok
IO4::pci3 io connected configured ok
IO5 HPCI+ connected configured ok
IO5::pci0 io connected configured ok
IO5::pci1 io connected configured ok
IO5::pci2 io connected configured ok
IO5::pci3 io connected configured ok
SB4 V3CPU connected unconfigured ok
SB4::cpu0 cpu connected unconfigured ok
SB4::cpu1 cpu connected unconfigured ok
SB4::cpu2 cpu connected unconfigured ok
SB4::cpu3 cpu connected unconfigured ok
SB4::memory memory connected unconfigured ok


root@PSBLD007 # cfgadm -c disconnect SB4


root@PSBLD007 # cfgadm -al | more
Ap_Id Type Receptacle Occupant Condition
IO4 HPCI+ connected configured ok
IO4::pci0 io connected configured ok
IO4::pci1 io connected configured ok
IO4::pci2 io connected configured ok
IO4::pci3 io connected configured ok
IO5 HPCI+ connected configured ok
IO5::pci0 io connected configured ok
IO5::pci1 io connected configured ok
IO5::pci2 io connected configured ok
IO5::pci3 io connected configured ok
SB4 V3CPU disconnected unconfigured unknown
SB5 V3CPU connected configured ok


bash-2.05$ deleteboard sb4
SB4 successfully unassigned.



Jul 31 14:11:16 PSBLD007 unix: [ID 177789 kern.info] kphysm_delete: mem = 50331648K (0xc00000000)
Jul 31 14:11:16 PSBLD007 unix: [ID 585997 kern.info] kphysm_delete: avail mem = 47505080320
Jul 31 14:11:16 PSBLD007 dr: [ID 427603 kern.notice] OS unconfigure dr@0:SB4::cpu0
Jul 31 14:11:28 PSBLD007 dr: [ID 427603 kern.notice] OS unconfigure dr@0:SB4::cpu1
Jul 31 14:11:49 PSBLD007 dr: [ID 427603 kern.notice] OS unconfigure dr@0:SB4::cpu2
Jul 31 14:12:00 PSBLD007 dr: [ID 427603 kern.notice] OS unconfigure dr@0:SB4::cpu3
Jul 31 14:12:21 PSBLD007 dr: [ID 427603 kern.notice] OS unconfigure dr@0:SB4::memory
Jul 31 14:14:32 PSBLD007 genunix: [ID 408114 kern.info] /memory-controller@80,400000 (mc-us30) offline
Jul 31 14:14:32 PSBLD007 genunix: [ID 408114 kern.info] /memory-controller@81,400000 (mc-us31) offline
Jul 31 14:14:32 PSBLD007 genunix: [ID 408114 kern.info] /memory-controller@82,400000 (mc-us32) offline
Jul 31 14:14:32 PSBLD007 genunix: [ID 408114 kern.info] /memory-controller@83,400000 (mc-us33) offline
Jul 31 14:16:59 PSBLD007 genunix: [ID 408114 kern.info] /address-extender-queue@9e,0 (axq0) offline

PSBLD008 to PKEND021 Board Movement SB0

root@PSBLD008 # cfgadm -alv |grep permanent
SB4::memory connected configured ok base address 0x20000000000, 16777216 KBytes total, 5124096 KBytes permanent
root@PSBLD008 # cfgadm -al | grep SB
SB0 V3CPU connected configured ok
SB0::cpu0 cpu connected configured ok
SB0::cpu1 cpu connected configured ok
SB0::cpu2 cpu connected configured ok
SB0::cpu3 cpu connected configured ok
SB0::memory memory connected configured ok
SB3 V3CPU connected configured ok
SB3::cpu0 cpu connected configured ok
SB3::cpu1 cpu connected configured ok
SB3::cpu2 cpu connected configured ok
SB3::cpu3 cpu connected configured ok
SB3::memory memory connected configured ok
SB4 V3CPU connected configured ok
SB4::cpu0 cpu connected configured ok
SB4::cpu1 cpu connected configured ok
SB4::cpu2 cpu connected configured ok
SB4::cpu3 cpu connected configured ok
SB4::memory memory connected configured ok
SB5 V3CPU connected configured ok
SB5::cpu0 cpu connected configured ok
SB5::cpu1 cpu connected configured ok
SB5::cpu2 cpu connected configured ok
SB5::cpu3 cpu connected configured ok
SB5::memory memory connected configured ok



#cfgadm -c unconfigure SB0---------4:12pm--4:17pm
#cfgadm -c disconnect SB0----------4:18pm--4:19pm

telnet 10.1.18.122
root@T-Sky-20K-2-sc1 # su - sms-svc
T-Sky-20K-2-sc1:sms-svc:1> bash
bash-2.05$ showplatform -p domains

Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - PKEND021 Running Solaris
B - PKENA019 Running Solaris
C - PSBLD008 Running Solaris
D - PSAPA013 Running Solaris
E - PEAID015 Running Solaris


# deleteboard SB0---------------4sec

# addboard -d A SB0-------------4:21pm--4:30pm

Check the status of the board

# showboards -d A

After Adding the boards

1. check the Domains with showboards -d A

bash-2.05$ showboards -d A
Retrieving board information. Please wait.
......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 On V3CPU Active Passed A
SB7 - Empty Slot Available - Isolated
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated


2. Login to Domain A and check the permanent board

root@PKEND021 # cfgadm -alv |grep permanent
SB1::memory connected configured ok base address 0x2000000000, 16777216 KBytes total, 186763
2 KBytes permanent


3. Check the cfgadm output

root@PKEND021 # cfgdm -al | more
bash: cfgdm: command not found
root@PKEND021 # cfgadm -al | more
Ap_Id Type Receptacle Occupant Condition
IO0 HPCI+ connected configured ok
IO0::pci0 io connected configured ok
IO0::pci1 io connected configured ok
IO0::pci2 io connected configured ok
IO0::pci3 io connected configured ok
IO1 HPCI+ connected configured ok
IO1::pci0 io connected configured ok
IO1::pci1 io connected configured ok
IO1::pci2 io connected configured ok
IO1::pci3 io connected configured ok
SB0 V3CPU connected configured ok
SB0::cpu0 cpu connected configured ok
SB0::cpu1 cpu connected configured ok
SB0::cpu2 cpu connected configured ok
SB0::cpu3 cpu connected configured ok
SB0::memory memory connected configured ok
SB1 V3CPU connected configured ok
SB1::cpu0 cpu connected configured ok
SB1::cpu1 cpu connected configured ok
SB1::cpu2 cpu connected configured ok
SB1::cpu3 cpu connected configured ok
SB1::memory memory connected configured ok
SB2 V3CPU connected configured ok
SB2::cpu0 cpu connected configured ok
SB2::cpu1 cpu connected configured ok
SB2::cpu2 cpu connected configured ok
SB2::cpu3 cpu connected configured ok
SB2::memory memory connected configured ok
c0 scsi-bus connected configured unknown
c0::dsk/c0t0d0 disk connected configured unknown
c0::dsk/c0t1d0 disk connected configured unknown
c0::dsk/c0t4d0 CD-ROM connected configured unknown
c0::es/ses0 processor connected configured unknown
c0::es/ses1 processor connected configured unknown
c0::rmt/0 tape connected configured unknown
c1 scsi-bus connected configured unknown
c1::dsk/c1t0d0 disk connected configured unknown
c1::dsk/c1t1d0 disk connected configured unknown
c1::dsk/c1t4d0 CD-ROM connected configured unknown
c1::dsk/c1t6d0 disk connected configured unknown
c1::es/ses2 processor connected configured unknown
c1::es/ses3 processor connected configured unknown
c2 scsi-bus connected unconfigured unknown
c3 scsi-bus connected unconfigured unknown
c4 fc-fabric connected configured unknown
c4::50060e80042d0a20 disk connected configured unknown
c5 fc connected unconfigured unknown
c6 fc-fabric connected configured unknown
c6::50060e80042d0a30 disk connected configured unknown
c7 fc connected unconfigured unknown
pci_pci0:e00b1slot1 unknown connected unconfigured unknown
pci_pci5:e01b1slot1 unknown connected unconfigured unknown
pcisch1:e00b1slot0 pci-pci/hp connected configured ok
pcisch2:e00b1slot3 mult/hp connected configured ok
pcisch3:e00b1slot2 pci-pci/hp connected configured ok
pcisch5:e01b1slot0 pci-pci/hp connected configured ok
pcisch6:e01b1slot3 mult/hp connected configured ok
pcisch7:e01b1slot2 pci-pci/hp connected configured ok
root@PKEND021 #


To Remove SB2 Board

#cfgadm -c unconfigure SB2
#

#Check the status of SB2 board by executing a command

root@PKEND021 # cfgadm -al |more
Ap_Id Type Receptacle Occupant Condition
IO0 HPCI+ connected configured ok
IO0::pci0 io connected configured ok
IO0::pci1 io connected configured ok
IO0::pci2 io connected configured ok
IO0::pci3 io connected configured ok
IO1 HPCI+ connected configured ok
IO1::pci0 io connected configured ok
IO1::pci1 io connected configured ok
IO1::pci2 io connected configured ok
IO1::pci3 io connected configured ok
SB0 V3CPU connected configured ok
SB0::cpu0 cpu connected configured ok
SB0::cpu1 cpu connected configured ok
SB0::cpu2 cpu connected configured ok
SB0::cpu3 cpu connected configured ok
SB0::memory memory connected configured ok
SB1 V3CPU connected configured ok
SB1::cpu0 cpu connected configured ok
SB1::cpu1 cpu connected configured ok
SB1::cpu2 cpu connected configured ok
SB1::cpu3 cpu connected configured ok
SB1::memory memory connected configured ok
SB2 V3CPU connected unconfigured ok
SB2::cpu0 cpu connected unconfigured ok
SB2::cpu1 cpu connected unconfigured ok
SB2::cpu2 cpu connected unconfigured ok
SB2::cpu3 cpu connected unconfigured ok
SB2::memory memory connected unconfigured ok
c0 scsi-bus connected configured unknown
c0::dsk/c0t0d0 disk connected configured unknown
c0::dsk/c0t1d0 disk connected configured unknown
c0::dsk/c0t4d0 CD-ROM connected configured unknown
c0::es/ses0 processor connected configured unknown
c0::es/ses1 processor connected configured unknown
c0::rmt/0 tape connected configured unknown
c1 scsi-bus connected configured unknown
c1::dsk/c1t0d0 disk connected configured unknown
c1::dsk/c1t1d0 disk connected configured unknown
c1::dsk/c1t4d0 CD-ROM connected configured unknown
c1::dsk/c1t6d0 disk connected configured unknown
c1::es/ses2 processor connected configured unknown
c1::es/ses3 processor connected configured unknown
c2 scsi-bus connected unconfigured unknown
c3 scsi-bus connected unconfigured unknown
c4 fc-fabric connected configured unknown
c4::50060e80042d0a20 disk connected configured unknown
c5 fc connected unconfigured unknown
c6 fc-fabric connected configured unknown
c6::50060e80042d0a30 disk connected configured unknown
c7 fc connected unconfigured unknown
pci_pci0:e00b1slot1 unknown connected unconfigured unknown
pci_pci5:e01b1slot1 unknown connected unconfigured unknown
pcisch1:e00b1slot0 pci-pci/hp connected configured ok
pcisch2:e00b1slot3 mult/hp connected configured ok
pcisch3:e00b1slot2 pci-pci/hp connected configured ok
pcisch5:e01b1slot0 pci-pci/hp connected configured ok
pcisch6:e01b1slot3 mult/hp connected configured ok
pcisch7:e01b1slot2 pci-pci/hp connected configured ok
root@PKEND021 #

After unconfiguring the Board disconnect the board from Host by executing a command

root@PKEND021 # cfgadm -c disconnect SB2

Again check the status by executing a command

root@PKEND021 # cfgadm -al | more
Ap_Id Type Receptacle Occupant Condition
IO0 HPCI+ connected configured ok
IO0::pci0 io connected configured ok
IO0::pci1 io connected configured ok
IO0::pci2 io connected configured ok
IO0::pci3 io connected configured ok
IO1 HPCI+ connected configured ok
IO1::pci0 io connected configured ok
IO1::pci1 io connected configured ok
IO1::pci2 io connected configured ok
IO1::pci3 io connected configured ok
SB0 V3CPU connected configured ok
SB0::cpu0 cpu connected configured ok
SB0::cpu1 cpu connected configured ok
SB0::cpu2 cpu connected configured ok
SB0::cpu3 cpu connected configured ok
SB0::memory memory connected configured ok
SB1 V3CPU connected configured ok
SB1::cpu0 cpu connected configured ok
SB1::cpu1 cpu connected configured ok
SB1::cpu2 cpu connected configured ok
SB1::cpu3 cpu connected configured ok
SB1::memory memory connected configured ok
SB2 V3CPU disconnected unconfigured unknown
c0 scsi-bus connected configured unknown
c0::dsk/c0t0d0 disk connected configured unknown
c0::dsk/c0t1d0 disk connected configured unknown
c0::dsk/c0t4d0 CD-ROM connected configured unknown
c0::es/ses0 processor connected configured unknown
c0::es/ses1 processor connected configured unknown
c0::rmt/0 tape connected configured unknown
c1 scsi-bus connected configured unknown
c1::dsk/c1t0d0 disk connected configured unknown
c1::dsk/c1t1d0 disk connected configured unknown
c1::dsk/c1t4d0 CD-ROM connected configured unknown
c1::dsk/c1t6d0 disk connected configured unknown
c1::es/ses2 processor connected configured unknown
c1::es/ses3 processor connected configured unknown
c2 scsi-bus connected unconfigured unknown
c3 scsi-bus connected unconfigured unknown
c4 fc-fabric connected configured unknown
c4::50060e80042d0a20 disk connected configured unknown
c5 fc connected unconfigured unknown
c6 fc-fabric connected configured unknown
c6::50060e80042d0a30 disk connected configured unknown
c7 fc connected unconfigured unknown
pci_pci0:e00b1slot1 unknown connected unconfigured unknown
pci_pci5:e01b1slot1 unknown connected unconfigured unknown
pcisch1:e00b1slot0 pci-pci/hp connected configured ok
pcisch2:e00b1slot3 mult/hp connected configured ok
pcisch3:e00b1slot2 pci-pci/hp connected configured ok
pcisch5:e01b1slot0 pci-pci/hp connected configured ok
pcisch6:e01b1slot3 mult/hp connected configured ok
pcisch7:e01b1slot2 pci-pci/hp connected configured ok



Then Login to System Controller and check showboards -d A

bash-2.05$ showboards -d A
Retrieving board information. Please wait.
......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 Off V3CPU Assigned Unknown A
SB7 - Empty Slot Available - Isolated
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated

check for all the domains.

bash-2.05$ showboards
Retrieving board information. Please wait.
.........
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 Off V3CPU Assigned Unknown A
SB3 On V3CPU Active Passed C
SB4 On V3CPU Active Passed C
SB5 On V3CPU Active Passed C
SB6 On V3CPU Active Passed D
SB7 - Empty Slot Available - Isolated
SB8 On V3CPU Active Passed E
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO2 Off HPCI+ Assigned Unknown B
IO3 Off HPCI+ Assigned Unknown B
IO4 On HPCI+ Active Passed C
IO5 On HPCI+ Active Passed C
IO6 On HPCI+ Active Passed D
IO7 On HPCI+ Active Passed D
IO8 On HPCI+ Active Passed E
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated

Now delete the Board which is showing unknown status

bash-2.05$ deleteboard SB2
SB2 successfully unassigned.


Check the board status now by executing the command

bash-2.05$ showboards
Retrieving board information. Please wait.
.......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 Off V3CPU Available Unknown Isolated
SB3 On V3CPU Active Passed C
SB4 On V3CPU Active Passed C
SB5 On V3CPU Active Passed C
SB6 On V3CPU Active Passed D
SB7 - Empty Slot Available - Isolated
SB8 On V3CPU Active Passed E
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO2 Off HPCI+ Assigned Unknown B
IO3 Off HPCI+ Assigned Unknown B
IO4 On HPCI+ Active Passed C
IO5 On HPCI+ Active Passed C
IO6 On HPCI+ Active Passed D
IO7 On HPCI+ Active Passed D
IO8 On HPCI+ Active Passed E
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated


bash-2.05$ showplatform

Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - PKEND021 Running Solaris
B - - Powered Off
C - PSBLD008 Running Solaris
D - PSAPA013 Running Solaris
E - PEAID015 Running Solaris
F - - Powered Off
G - - Powered Off
H - - Powered Off
I - - Powered Off
J - - Powered Off
K - - Powered Off
L - - Powered Off
M - - Powered Off
N - - Powered Off
O - - Powered Off
P - - Powered Off
Q - - Powered Off
R - - Powered Off


bash-2.05$ addboard -d B SB2

Domain: B is not running. You can only "configure" a component into
a running domain. Would you like to "assign" the component(s) to
domain B instead (yes/no)? yes
SB2 assigned to domain: B
bash-2.05$

Now power on the board by executing the command

bash-2.05$ setkeyswitch -d b on

Powering on: CSB at CS1
Already powered on: CSB at CS1
Powering on: CSB at CS0
Already powered on: CSB at CS0
Powering on: EXB at EX2
Already powered on: EXB at EX2
Powering on: HPCI+ at IO2
Powering on: V3CPU at SB2
Powering on: EXB at EX3
Already powered on: EXB at EX3
Powering on: HPCI+ at IO3

Significant contents of .postrc (platform)
/etc/opt/SUNWSMS/SMS1.5/config/platform/.postrc:
# ident "@(#)postrc 1.1 01/04/02 SMI"

Reading domain blacklist file /etc/opt/SUNWSMS/config/B/blacklist ...
# ident "@(#)blacklist 1.1 01/04/02 SMI"
Reading platform blacklist file /etc/opt/SUNWSMS/config/platform/blacklist ...
# ident "@(#)blacklist 1.1 01/04/02 SMI"
SEEPROM probe took 0 seconds.
Reading Component Health Status (CHS) information ...
stage lport_reset: Assert reset to IOC ports in -Q mode...
stage_lport_reset(): Not -Q mode; Skipping Stage lport_reset
stage bus_probe: Check in-use bus configurations...
stage asic_probe: ASIC probe and JTAG/CBus integrity test...
stage brd_rev_eval: Board Revision Evaluation and Compliance...
stage cpu_probe: CPU Module probe...
stage cdc_probe: CDC DIMM probe...
stage mem_probe: Memory dimm probe...
stage adapter_probe: I/O adapter probe...
stage cp_shorts: Centerplane Shorts...
stage lbist: Logic BIST...
stage ibist: Interconnect BIST...
stage field_ict: Field Interconnect Tests...
stage mbist1: Internal memory BIST...
stage mbist2: External memory BIST...
stage domain_sync: Domain sync test...
stage cbus_bbsram: Console Bus test of bootbus sram...
stage sc_interrupt: DARB to SC interrupt...
stage cdc_clear: CDC DIMM clear...
stage cpu_lpost: Test all L1 CPU boards...
Performing ASIC config with bus config a/d/r = 333...
Slot0 in domain: 00004
Slot1 in domain: 0000C
EXBs in use: 001FB
sgcpu.flash file: Version 5.19.6 Build 1.0 I/F 12 is newest supported
stage nmb_cpu_lpost: Non-Mem Board Proc tests...
Performing ASIC config with bus config a/d/r = 333...
Slot0 in domain: 00004
Slot1 in domain: 0000C
EXBs in use: 001FB
stage_cpu_lpost(): No NMB Boards in config. Skipping Stage nmb_cpu_lpost.
Acquiring licenses for all good processors...
stage wib_lpost: Wildcat interface board tests...
stage_wib_lpost(): No good Wcis; Skipping Stage wib_lpost
stage pci_lpost: Test all L1 I/O boards...
Performing ASIC config with bus config a/d/r = 333...
Slot0 in domain: 00004
Slot1 in domain: 0000C
EXBs in use: 001FB
pcilpost.elf Version 5.19.6 Build 1.0 I/F 12 is newest supported
NOTE: Mixed Minor numbers: 2
All LPOSTs in a domain should use the same version.
Table of version comparisons:
Fprom SB02/F0: 5.19.3 Build 1.0 I/F 12 vs pcilpost.elf: 5.19.6 Build 1.0 I/F 12
Fprom SB02/F1: 5.19.3 Build 1.0 I/F 12 vs pcilpost.elf: 5.19.6 Build 1.0 I/F 12
stage exp_lpost: Domain-level board and system tests...
explpost.elf Version 5.19.6 Build 1.0 I/F 12 is newest supported
NOTE: Mixed Minor numbers: 2
All LPOSTs in a domain should use the same version.
Table of version comparisons:
Fprom SB02/F0: 5.19.3 Build 1.0 I/F 12 vs explpost.elf: 5.19.6 Build 1.0 I/F 12
Fprom SB02/F1: 5.19.3 Build 1.0 I/F 12 vs explpost.elf: 5.19.6 Build 1.0 I/F 12
stage cpu_lpost_II: CPU L1 domain/system tests...
sgcpu.flash file: Version 5.19.6 Build 1.0 I/F 12 is newest supported
stage pci_lpost_Q: Init all L1 I/O boards under -Q...
stage cpu_lpost_II_Q: CPU L1 domain/system init under -Q...
stage final_config: Final configuration...
Creating CPU SRAM handoff structures...
Creating GDCD IOSRAM handoff structures in Slot IO2...
Writing domain information to PCD...

Key to resource status value codes:
?=Unknown p=Present c=Crunched _=Undefined m=Missing
i=Misconfig o=FailedOBP f=Failed b=Blacklisted r=Redlisted
x=NotInDomain u=G,unconfig P=Passed ==G,lockstep l=NoLicense
e=EmptyCasstt

CPU_Brds: PortCore
3 2 1 0 Mem P/B: 3/1 3/0 2/1 2/0 1/1 1/0 0/1 0/0
Slot Gen 10101010 /L: 10 10 10 10 10 10 10 10 CDC
SB02: P PPPPPPPP PP PP PP PP PP PP PP PP P

I/O_Brds: IOC P1/Bus/Adapt IOC P0/Bus/Adapt
Slot Gen Type P1 B1/10 B0/10 P0 B1/eb10 B0/10 (e=ENet, b=BBC)
IO02: P hsPCI+ P p _p p _p P p PP_e p _p
IO03: P hsPCI+ P p _p p _p P p PP_e p _p

Configured in 333 with 4 procs, 16.000 GBytes, 6 IO adapters.
Interconnect frequency is 149.978 MHz, Measured.
Golden sram is on Slot IO2.
POST (level=16, verbose=20) execution time 5:53


Now check the status of the Board

bash-2.05$ showboards
Retrieving board information. Please wait.
...........
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 On V3CPU Active Passed B
SB3 On V3CPU Active Passed C
SB4 On V3CPU Active Passed C
SB5 On V3CPU Active Passed C
SB6 On V3CPU Active Passed D
SB7 - Empty Slot Available - Isolated
SB8 On V3CPU Active Passed E
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO2 On HPCI+ Active Passed B
IO3 On HPCI+ Active Passed B
IO4 On HPCI+ Active Passed C
IO5 On HPCI+ Active Passed C
IO6 On HPCI+ Active Passed D
IO7 On HPCI+ Active Passed D
IO8 On HPCI+ Active Passed E
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated



bash-2.05$ showplatform

PLATFORM:
=========
Platform Type: Sun Fire E20K

CSN:
====
Chassis Serial Number: 0609AK20BA

COD:
====
Chassis HostID: 5014936D87943
Proc RTUs installed: 0
PROC Headroom Quantity: 0
Proc RTUs reserved for domain A: 0
Proc RTUs reserved for domain B: 0
Proc RTUs reserved for domain C: 0
Proc RTUs reserved for domain D: 0
Proc RTUs reserved for domain E: 0
Proc RTUs reserved for domain F: 0
Proc RTUs reserved for domain G: 0
Proc RTUs reserved for domain H: 0
Proc RTUs reserved for domain I: 0
Proc RTUs reserved for domain J: 0
Proc RTUs reserved for domain K: 0
Proc RTUs reserved for domain L: 0
Proc RTUs reserved for domain M: 0
Proc RTUs reserved for domain N: 0
Proc RTUs reserved for domain O: 0
Proc RTUs reserved for domain P: 0
Proc RTUs reserved for domain Q: 0
Proc RTUs reserved for domain R: 0


Available Component List for Domains:
=====================================
Available Component List for domain A:
No System boards
No IO boards

Available Component List for domain B:
No System boards
No IO boards

Available Component List for domain C:
No System boards
No IO boards

Available Component List for domain D:
No System boards
No IO boards

Available Component List for domain E:
No System boards
No IO boards

Available Component List for domain F:
No System boards
No IO boards

Available Component List for domain G:
No System boards
No IO boards

Available Component List for domain H:
No System boards
No IO boards

Available Component List for domain I:
No System boards
No IO boards

Available Component List for domain J:
No System boards
No IO boards

Available Component List for domain K:
No System boards
No IO boards

Available Component List for domain L:
No System boards
No IO boards

Available Component List for domain M:
No System boards
No IO boards

Available Component List for domain N:
No System boards
No IO boards

Available Component List for domain O:
No System boards
No IO boards

Available Component List for domain P:
No System boards
No IO boards

Available Component List for domain Q:
No System boards
No IO boards

Available Component List for domain R:
No System boards
No IO boards


Domain Ethernet Addresses:
==========================
Domain ID Domain Tag Ethernet Address
A - 0:0:be:a9:fc:24
B - 0:0:be:a9:fc:25
C - 0:0:be:a9:fc:26
D - 0:0:be:a9:fc:27
E - 0:0:be:a9:fc:28
F - 0:0:be:a9:fc:29
G - 0:0:be:a9:fc:2a
H - 0:0:be:a9:fc:2b
I - 0:0:be:a9:fc:2c
J - 0:0:be:a9:fc:2d
K - 0:0:be:a9:fc:2e
L - 0:0:be:a9:fc:2f
M - 0:0:be:a9:fc:30
N - 0:0:be:a9:fc:31
O - 0:0:be:a9:fc:32
P - 0:0:be:a9:fc:33
Q - 0:0:be:a9:fc:34
R - 0:0:be:a9:fc:35


Domain configurations:
======================
Domain ID Domain Tag Solaris Nodename Domain Status
A - PKEND021 Running Solaris
B - - Running OBP
C - PSBLD008 Running Solaris
D - PSAPA013 Running Solaris
E - PEAID015 Running Solaris
F - - Powered Off
G - - Powered Off
H - - Powered Off
I - - Powered Off
J - - Powered Off
K - - Powered Off
L - - Powered Off
M - - Powered Off
N - - Powered Off
O - - Powered Off
P - - Powered Off
Q - - Powered Off
R - - Powered Off

bash-2.05$

Now go to console of domain B. You will get a ok prompt execute the boot command

bash-2.05$ console -d b
Trying to connect...
Connected to Domain Server.
Your console is in exclusive mode now.

{40} ok boot
Boot device: /pci@5d,600000/pci@1/scsi@2/disk@0,0:a File and args:
SunOS Release 5.9 Version Generic_118558-21 64-bit
Copyright 1983-2003 Sun Microsystems, Inc. All rights reserved.
Use is subject to license terms.
NOTICE: Kernel Cage Splitting is ENABLED
WARNING: forceload of misc/md_trans failed
WARNING: forceload of misc/md_raid failed
WARNING: forceload of misc/md_hotspares failed
WARNING: forceload of misc/md_sp failed
WARNING: ce4: fault detected external to device; service degraded
WARNING: ce4: xcvr addr:0x01 - link down
NOTICE: ce4: fault cleared external to device; service available
NOTICE: ce4: xcvr addr:0x01 - link up 1000 Mbps full duplex
configuring IPv4 interfaces: ce0 ce6 eri0.
Hostname: PKENA019
WARNING: ce10: fault detected external to device; service degraded
WARNING: ce10: xcvr addr:0x01 - link down
NOTICE: ce10: fault cleared external to device; service available
NOTICE: ce10: xcvr addr:0x01 - link up 1000 Mbps full duplex
WARNING: ce10: fault detected external to device; service degraded
WARNING: ce10: xcvr addr:0x01 - link down
NOTICE: ce10: fault cleared external to device; service available
NOTICE: ce10: xcvr addr:0x01 - link up 1000 Mbps full duplex
ID[luxadm.create_fabric_device.2316] configuration failed for line (/devices/pci@5d,700000/SUNW,qlc@1,1/fp@0,0:fc::100000e002233b2b) in file: /etc/cfg/fp/fabric_WWN_map. I/O error
Could not open /dev/rmt/2l to verify device id.
No such device or address
Could not open /dev/rmt/1l to verify device id.
No such device or address
Booting as part of a cluster
NOTICE: CMM: Node PKENA018 (nodeid = 1) with votecount = 1 added.
NOTICE: CMM: Node PKENA019 (nodeid = 2) with votecount = 1 added.
NOTICE: CMM: Quorum device 2 (/dev/did/rdsk/d9s2) added; votecount = 1, bitmask of nodes with configured paths = 0x3.
NOTICE: clcomm: Adapter ce10 constructed
NOTICE: clcomm: Path PKENA019:ce10 - PKENA018:ce10 being constructed
NOTICE: clcomm: Adapter ce4 constructed
NOTICE: clcomm: Path PKENA019:ce4 - PKENA018:ce4 being constructed
NOTICE: CMM: Node PKENA019: attempting to join cluster.
NOTICE: clcomm: Path PKENA019:ce10 - PKENA018:ce10 being initiated
NOTICE: clcomm: Path PKENA019:ce4 - PKENA018:ce4 being initiated
NOTICE: CMM: Node PKENA018 (nodeid: 1, incarnation #: 1211187859) has become reachable.
NOTICE: clcomm: Path PKENA019:ce10 - PKENA018:ce10 online
NOTICE: clcomm: Path PKENA019:ce4 - PKENA018:ce4 online
NOTICE: CMM: Cluster has reached quorum.
NOTICE: CMM: Node PKENA018 (nodeid = 1) is up; new incarnation number = 1211187859.
NOTICE: CMM: Node PKENA019 (nodeid = 2) is up; new incarnation number = 1211352533.
NOTICE: CMM: Cluster members: PKENA018 PKENA019.
NOTICE: CMM: node reconfiguration #16 completed.
NOTICE: CMM: Node PKENA019: joined cluster.
ip: joining multicasts failed (18) on clprivnet0 - will use link layer broadcasts for multicast
Could not open /dev/rmt/2l to verify device id.
No such device or address
Could not open /dev/rmt/1l to verify device id.
No such device or address
The system is coming up. Please wait.
checking ufs filesystems
/dev/rdsk/c0t0d0s3: is logging.
Starting DCE daemons in rc.dce
/opt/OV/dce/bin/dced -b
Finished DCE daemons in rc.dce
starting rpc services: rpcbind done.
Setting netmask of ce0 to 255.255.255.0
Setting netmask of ce0:1 to 255.255.255.0
Setting netmask of ce6 to 255.255.255.0
Setting netmask of eri0 to 255.255.255.0
Setting netmask of ce10 to 255.255.255.128
Setting netmask of ce4 to 255.255.255.128
Setting netmask of clprivnet0 to 255.255.255.0
Setting default IPv4 interface for multicast: add net 224.0/4: gateway PKENA019
syslog service starting.
obtaining access to all attached disks
May 21 12:19:34 PKENA019 sckmd: PF_KEY error: type=DELETE, errno=3, diagnostic code=0
Starting Sun Java(TM) Web Console Version 2.2...
See /var/log/webconsole/console_debug_log for server logging information
starting NetWorker daemons:
nsrexecd
share_nfs: /s1/kenan/htm: No such file or directory
volume management starting.
Using /var/run
Storing undefined to /var/run/psn
The system is ready.

PKENA019 console login:
========================================================

Board Movement

Login to 10.1.18.122(SC) and do su - sms-user.
give the showboards and showplatforms command to find out the board used by PKENA019 as it will contain the host name also along with board no.

Switching the cluster

Login to 10.1.18.37(PKENA019)
scswitch -z -g Kenact-RG -h PKENA018
After switchover give
init 0 on PKENA019.



Moving the board.

Login to 10.1.18.122(SC) which contains the board of PKENA019.
su - sms-user
sc:sms-user:> showplatforms -p domains
sc:sms-user:> showboards
sc:sms-user:> setkeyswitch -d off
Current virtual key switch position is on.Change it to off?
sc:sms-user:> deleteboard -c unassign SB
SB unassigned.

Pull out the board and put it in other E20K domain.

Login to 10.1.18.112(SC) which contains the board of PKENA018
su - sms-user

sc:sms-user:> showplatform -p domains
sc:sms-user:> addboard -d -c assign SB
sc:sms-user:>showboard -d

Login to the PKENA018 and verify if 16 CPU's are available.

E20K Board Adding to PKENA019 Host

Login to PKENA019

1. Check the status of the cluster node by executing a command
#scstat –g
# scswitch -n -M -j Kenact-lh-rs
#scswitch -n -M -j Kenact-hasp-rs
#scswitch -n -M -j ebs-rs
#scswitch -n -M -j Kenadm-DB-rs
#scswitch -n -M -j Kencat-DB-rs
#scswitch -n -M -j Kenlsnr-res

All the above command will disable cluster monitoring.


2. root@PKENA019 # cfgadm -alv |grep permanent
SB2::memory connected configured ok base address 0x22000000000, 16777216 KBytes total, 4734056 KBytes permanent

root@PKENA019 # init 0

Now PKENA019 will be in OK prompt.


Login to SC (10.1.18.122)

su to sms-svc

T-Sky-20K-2-sc1:sms-svc:2> showboards
Retrieving board information. Please wait.
......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A---------------PKEND021
SB1 On V3CPU Active Passed A---------------PKEND021
SB2 On V3CPU Active Passed B---------------PKENA019
SB3 On V3CPU Active Passed C---------------PSBLD008
SB4 On V3CPU Active Passed C---------------PSBLD008
SB5 On V3CPU Active Passed C---------------PSBLD008
SB6 On V3CPU Active Passed D---------------PSAPA013
SB7 - Empty Slot Available - Isolated
SB8 On V3CPU Active Passed E---------------PEAID015


IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO2 On HPCI+ Active Passed B
IO3 On HPCI+ Active Passed B
IO4 On HPCI+ Active Passed C
IO5 On HPCI+ Active Passed C
IO6 On HPCI+ Active Passed D
IO7 On HPCI+ Active Passed D
IO8 On HPCI+ Active Passed E

bash-2.05$ showkeyswitch -d b
Virtual key switch position: ON

Turn off the keyswitch of domain B .

bash-2.05$ setkeyswitch -d b off
Current virtual key switch position is "ON".
Are you sure you want to change to the "OFF" position (yes/no)? yes

Domain is down.
Waiting on exclusive access to EXB(s): 3FFFF.
Powering off: V3CPU at SB2
Powering off: HPCI+ at IO2
Powering off: EXB at EX2
Powering off: HPCI+ at IO3
bash-2.05$

T-Sky-20K-2-sc1:sms-svc:>showplatform -p domains =======Check the status

T-Sky-20K-2-sc1:sms-svc:>deleteboard SB2
SB2 successfully unassigned.
bash-2.05$

Add the board to Domain A
T-Sky-20K-2-sc1:sms-svc:>addboard -d A SB2
assign SB2
.
assign SB2 done
poweron SB2
.............
poweron SB2 done
test SB2 .......... test SB2 done
connect SB2 ...... connect SB2 done
configure SB2
....
configure SB2 done
.
notify online SUNW_cpu/cpu64
notify online SUNW_cpu/cpu68
notify online SUNW_cpu/cpu65
notify online SUNW_cpu/cpu69
notify online SUNW_cpu/cpu66
notify online SUNW_cpu/cpu70
notify online SUNW_cpu/cpu67
notify online SUNW_cpu/cpu71
.
notify add capacity (8 cpus)
notify add capacity (2097152 pages)
notify add capacity SB2 done


T-Sky-20K-2-sc1:sms-svc:>showboards -d A


bash-2.05$ showboards -d A
Retrieving board information. Please wait.
......
Location Pwr Type of Board Board Status Test Status Domain
-------- --- ------------- ------------ ----------- ------
SB0 On V3CPU Active Passed A
SB1 On V3CPU Active Passed A
SB2 On V3CPU Active Passed A
SB7 - Empty Slot Available - Isolated
SB9 - Empty Slot Available - Isolated
SB10 - Empty Slot Available - Isolated
SB11 - Empty Slot Available - Isolated
SB12 - Empty Slot Available - Isolated
SB13 - Empty Slot Available - Isolated
SB14 - Empty Slot Available - Isolated
SB15 - Empty Slot Available - Isolated
SB16 - Empty Slot Available - Isolated
SB17 - Empty Slot Available - Isolated
IO0 On HPCI+ Active Passed A
IO1 On HPCI+ Active Passed A
IO9 - Empty Slot Available - Isolated
IO10 - Empty Slot Available - Isolated
IO11 - Empty Slot Available - Isolated
IO12 - Empty Slot Available - Isolated
IO13 - Empty Slot Available - Isolated
IO14 - Empty Slot Available - Isolated
IO15 - Empty Slot Available - Isolated
IO16 - Empty Slot Available - Isolated
IO17 - Empty Slot Available - Isolated



EXAMPLES
Example 1: Assigning Boards to Domain C

To assign four boards to domain C you must have platform
privileges or domain privileges and the boards must be in
the domain available component list.

sc0:sms-user:> addboard -d C -c assign SB0 IO1 SB1 SB2
SB0 assigned to domain: C
IO1 assigned to domain: C
SB1 assigned to domain: C
SB2 assigned to domain: C

E20K SMS Upgrade from SMS 1.5 to SMS 1.6

SC PREREQUISITES,
Solaris 9 OS Requirements for SCs
As a minimum, the Solaris 9 OS version of SMS 1.6 requires:
■ Solaris 9 4/04 OS release
■ Same version of SMS software on both system controllers

Patches to be installed in SC before starting the upgradation,
■ 113027-03 patch
■ 111712-12 patch (for UltraSPARC® IV+ 1800 MHz only)

Domain PREREQUISITES,
Solaris 9 OS Requirements for Domains
As a minimum, the Solaris 9 OS requires:
■ Solaris 9 4/04 OS release.
■ Patches:
■ 113027-03 patch
■ 112233-09 patch

Solaris 10 OS Requirements for Domains
As a minimum, the Solaris 10 OS requires:
■ Solaris 10 3/05 (minimum for UltraSPARC III+, UltraSPARC III++, and
UltraSPARC IV)
■ Solaris 10 3/05 HW1 (minimum for UltraSPARC IV+ 1500 MHz and 1800 MHz
only)
The Solaris 10 1/06 OS release is supported on the domains on all processors.

1. Gather the superuser passwords for both SCs.
2. Be sure you have platadmn privileges to both SCs.
3. On both SCs, determine the directory into which you will download the SMS
software from the web.
4. Ensure that the SC data is synchronized between the two SCs by typing this
command on the main SC.

root@tatatel12-25k-sc0-eri0 # /opt/SUNWsms/bin/setdatasync backup

5. Ensure that both SC clocks are phase-locked. You can do this by looking at the
most recent messages in the platform logs, which say whether the SC clocks are
locked or not.

Preparing for Installation
1) Login to the Main SC (SC0)
2) Disable failover,

root@tatatel12-25k-sc0-eri0:sms-svc:># /opt/SUNWSMS/bin/setfailover off
3) Backup the current configuration on SC0
1a. Stop SMS
root@tatatel12-25k-sc0-eri0:sms-svc:># /etc/init.d/sms stop

2b. run smsbackup
root@tatatel12-25k-sc0-eri0:sms-svc:># /opt/SUNWSMS/bin/smsbackup directory_name

3c. Start SMS.html
root@tatatel12-25k-sc0-eri0:sms-svc:># /etc/init.d/sms start

4) Login to the Spare SC (SC1) as root
5) Backup the current configuration on SC1
1a. Stop SMS
root@tatatel12-25k-sc1-eri0:sms-svc:># /etc/init.d/sms stop

2b. run smsbackup
root@tatatel12-25k-sc1-eri0:sms-svc:># /opt/SUNWSMS/bin/smsbackup directory_name

SMS Upgrade on Spare SC:

1. Login to spare SC (SC1)
2. ftp Solaris 9 OS–SMS-1_6-S9-sparc.zip to /sun on spare SC (SC1)
3. root@tatatel12-25k-sc1-eri0:# cd /sun
4. root@tatatel12-25k-sc1-eri0:# unzip SMS-1_6-S9-sparc.zip
5. root@tatatel12-25k-sc1-eri0:# cd /sun/sms_1_6_sparc/System_Management_Services_1.6/Tools
6. root@tatatel12-25k-sc1-eri0:# ./smsupgrade /sun/sms_1_6_sparc/System_Management_Services_1.6/Product

7. Install all the SMS 1.6 patches on the spare SC which are available on EIS CD.


Switch Control to the Spare SC
1. Log in to the main system controller (sc0) as superuser.
2. Stop SMS.
root@tatatel12-25k-sc0-eri0:sms-svc:># /etc/init.d/sms stop
3. Log in to the spare SC (sc1) and change to the OpenBoot PROM prompt.
root@tatatel12-25k-sc1eri0:># shutdown -y -g0 -i0

4. Reboot the spare SC.
root@tatatel12-25k-sc1eri0:># boot –rv

To Upgrade the Spare SC Flash PROMs

1. Log in to the spare SC as a user with platadmn privileges.

2. root@tatatel12-25k-sc1-eri0:sms-svc:># flashupdate -f /opt/SUNWSMS/firmware/SCOBPimg.di sc1/fp0

3. root@tatatel12-25k-sc1-eri0:sms-svc:># flashupdate -f /opt/SUNWSMS/firmware/oSSCPOST.di sc1/fp1

4. root@tatatel12-25k-sc1-eri0:sms-svc:># /etc/init.d/sms stop

5. root@tatatel12-25k-sc1-eri0:sms-svc:># shutdown -y -g0 -i0

6. root@tatatel12-25k-sc1-eri0:sms-svc:># boot –rv



SMS Upgrade on Main SC:

1. Login to main SC (SC0)

2. ftp Solaris 9 OS–SMS-1_6-S9-sparc.zip to /sun on spare SC (SC1)

3. root@tatatel12-25k-sc0-eri0:# cd /sun

4. root@tatatel12-25k-sc0-eri0:# unzip SMS-1_6-S9-sparc.zip

5. root@tatatel12-25k-sc0-eri0:# cd /sun/sms_1_6_sparc/System_Management_Services_1.6/Tools

6. root@tatatel12-25k-sc0-eri0:# ./smsupgrade /sun/sms_1_6_sparc/System_Management_Services_1.6/Product

7. Install all the SMS 1.6 patches on the spare SC which are available on EIS CD.


To Upgrade the Main SC Flash PROMs
1. Log in to the spare SC as a user with platadmn privileges.

2. root@tatatel12-25k-sc0-eri0:sms-svc:># flashupdate -f /opt/SUNWSMS/firmware/SCOBPimg.di sc0/fp0


3. root@tatatel12-25k-sc0-eri0:sms-svc:># flashupdate -f /opt/SUNWSMS/firmware/oSSCPOST.di sc0/fp1


To Switch Control Back to the Main SC

1. Log in to the spare SC (sc1) as superuser.

2. root@tatatel12-25k-sc1-eri0:sms-svc:># /etc/init.d/sms stop

3. Log in to the main SC (sc0) and change to the OpenBoot PROM prompt.

4. root@tatatel12-25k-sc0-eri0:># shutdown -y -g0 -i0

5. root@tatatel12-25k-sc0-eri0:># boot –rv

6. Use the /etc/init.d/sms script to restart SMS on the spare SC
root@tatatel12-25k-sc1-eri0:># /etc/init.d/sms start

7. Enable failover
root@tatatel12-25k-sc0-eri0:sms-svc># /opt/SUNWSMS/bin/setfailover on

8. Verify that failover is working.
root@tatatel12-25k-sc0-eri0:sms-svc># /opt/SUNWSMS/bin/showfailover -v


To Upgrade the System Board Flash PROMs
1. Log in to the main SC as a user with platadmn privileges.

2. Use flashupdate to upgrade the CPU flash PROMs in a domain.
root@tatatel12-25k-sc0-eri0:sms-svc># flashupdate -d \/opt/SUNWSMS/hostobjs/sgcpu.flash

Thursday, June 17, 2010

Scsetup - Modifications

#------------------------------------------------------------
# delete a metaset...

# check status
metastat -p -s Kencu1-DG

# remove all soft partitions and concats...
metaclear -s Kencu1-DG d1
metaclear -s Kencu1-DG d2
metaclear -s Kencu1-DG d3
metaclear -s Kencu1-DG d0

# remove all disks from the metaset...
metaset -s Kencu1-DG -d /dev/did/rdsk/d5

# you might have to force it "-f"
metaset -s Kencu1-DG -d -f /dev/did/rdsk/d5

# remove all hosts from diskset
metaset -s Kencu1-DG -d -h test
metaset -s Kencu1-DG -d -h test1
metaset -s Kencu1-DG -d -f -h test2

# check that the diskset no longer exists
metaset
scstat -D

#------------------------------------------------------------
# create a new diskset

# create metaset and mediators
metaset -s new-cu1 -a -h PKEND022 PKEND023
metaset -s new-cu1 -a -m PKEND022 PKENd023

# add disk to the metaset
metaset -s new-cu1 -a /dev/did/rdsk/d5

# check status
metaset -s new-cu1
metadb -s new-cu1
medstat -s new-cu1

# create the first concat
metainit -s new-cu1 d0 1 1 /dev/did/rdsk/d5s0

# create soft partitions (If required In our case its not required )
metainit -s new-cu1 d1 -p d0 100m
metainit -s new-cu1 d2 -p d0 2g
metainit -s new-cu1 d3 -p d0 2g
metainit -s new-cu1 d4 -p d0 1.5g

#------------------------------------------------------------
# create resource group
scrgadm -a -g Kencu1-DG -h PKEND022,PKEND023 -y RG_description="KENCU1-DG"

# create StoragePlus resource
scrgadm -a -j kencu1-hasp-rs -t SUNW.HAStoragePlus -g Kencu1-DG \
-x FileSystemMountPoints=/s1/kencu2/oracle,/s1/kencu2/oradata01,/s1/kencu2/oradata02 \
-x AffinityOn=true

# create logical hostname resource
scrgadm -a -L -g Kencu1-DG -j kencu1-lh-rs -l PKENCU2

# Resource Group commands

# check status of all resource groups/resources
scstat -g

# shutdown a resource group
scswitch -F -g

# start a resource group
scswitch -Z -g

# failover a resource group to another node
scswitch -z -g -h

# restart a resource group
scswitch -R -g -h

# evacuate all resources and resource groups from a node
scswitch -S -h

#------------------------------------------------------------
# Resource commands

# disable a resource and its fault monitor
scswitch -n -j

# enable a resource and its fault monitor
scswitch -e -j

# clear the STOP_FAILED flag of a resource
scswitch -c -j -h -f STOP_FAILED

#------------------------------------------------------------
# Shutdown the ENTIRE cluster
scshutdown

#------------------------------------------------------------
# View properties of Resource Groups/Resources
# Use "-v" to increase verbosity
scrgadm -p -g
scrgadm -p -j


#------------------------------------------------------------
# Add in ipmp tests and maintenance procedures

#------------------------------------------------------------
# Add a new LUN on the fly...

# probe/display all fibre attached devices
cfgadm -al

# look for the WWN numbers given by storage

root@PKEND022 # cfgadm -al
Ap_Id Type Receptacle Occupant Condition
c0 scsi-bus connected configured unknown
c0::dsk/c0t0d0 CD-ROM connected configured unknown
c1 fc-private connected configured unknown
c1::210000008741a927 disk connected configured unknown
c1::2100000087968e21 disk connected configured unknown
c2 fc-fabric connected configured unknown
c2::50060e80042d0a24 disk connected configured unusable
c3 fc-fabric connected unconfigured unknown
c3::50060e80042d0a06 disk connected unconfigured unknown unknown <-- and this one
c4 fc-fabric connected configured unknown
c4::50060e80042d0a34 disk connected configured unusable
c5 fc-fabric connected unconfigured unknown
c5::50060e80042d0a16 disk connected unconfigured unknown
c7 scsi-bus connected unconfigured unknown
c8 scsi-bus connected unconfigured unknown
usb0/1 unknown empty unconfigured ok
usb0/2 unknown empty unconfigured ok
usb0/3 unknown empty unconfigured ok
usb0/4 unknown empty unconfigured ok
root@PKEND022 #


# if it's not already configured then configure the disk
cfgadm -c configure c3::50060e80042d0a06 c5::50060e80042d0a16

# this one shows lun hex id's too!!
cfgadm -al -o show_FCP_dev c3::50060e80042d0a06

# else confirm with...
# format
Searching for disks...done

c6t500060E80000000000009CBB00000484d0: configured with capacity of 96.28GB
c6t500060E80000000000009CBB00000492d0: configured with capacity of 96.28GB


AVAILABLE DISK SELECTIONS:
0. c1t0d0
/ssm@0,0/pci@18,600000/scsi@2/sd@0,0
1. c1t1d0
/ssm@0,0/pci@18,600000/scsi@2/sd@1,0
2. c6t500060E80000000000009CBB0000043Dd0
/scsi_vhci/ssd@g500060e80000000000009cbb0000043d
3. c6t500060E80000000000009CBB00000051d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000051
4. c6t500060E80000000000009CBB00000141d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000141
5. c6t500060E80000000000009CBB00000430d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000430
6. c6t500060E80000000000009CBB00000442d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000442
7. c6t500060E80000000000009CBB00000443d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000443
8. c6t500060E80000000000009CBB00000444d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000444
9. c6t500060E80000000000009CBB00000445d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000445
10. c6t500060E80000000000009CBB00000480d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000480
11. c6t500060E80000000000009CBB00000484d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000484
12. c6t500060E80000000000009CBB00000492d0
/scsi_vhci/ssd@g500060e80000000000009cbb00000492
Specify disk (enter its number):

# the label of the disk should give you an indication of which one you are after (ie 128Gb = OPEN-9*14)

# then confirm your suspicions with...
# luxadm display /dev/rdsk/c6t500060E80000000000009CBB00000492d0s2
DEVICE PROPERTIES for disk: /dev/rdsk/c6t500060E80000000000009CBB00000492d0s2
Vendor: HITACHI
Product ID: OPEN-9*14 -SUN
Revision: 0117
Serial Num: 40123
Unformatted capacity: 103384.352 MBytes
Write Cache: Enabled
Read Cache: Enabled
Minimum prefetch: 0x0
Maximum prefetch: 0x0
Device Type: Disk device
Path(s):

/dev/rdsk/c6t500060E80000000000009CBB00000492d0s2
/devices/scsi_vhci/ssd@g500060e80000000000009cbb00000492:c,raw
Controller /devices/ssm@0,0/pci@18,600000/SUNW,qlc@1/fp@0,0
Device Address 500060e8029cbb08,c <-- check this line
Host controller port WWN 210000e08b0a3bfe
Class primary
State ONLINE

# the Device Address line above should correlate with the WWN and Hex address that
# the Storage boys supply you with.

# create new did instances for these devices on each node
# scdidadm -r
did instance 21 created.
did subpath mulloway:/dev/rdsk/c6t500060E80000000000009CBB00000484d0 created for instance 21.
did instance 22 created.
did subpath mulloway:/dev/rdsk/c6t500060E80000000000009CBB00000492d0 created for instance 22.
root@pkend023:/root
root@pkend022:/root
# scdidadm -r
did subpath /dev/rdsk/c6t500060E80000000000009CBB00000484d0s2 created for instance 21.
did subpath /dev/rdsk/c6t500060E80000000000009CBB00000492d0s2 created for instance 22.
root@pkend022:/root
root@pkend022:/root
# scdidadm -L|grep c6t500060E80000000000009CBB00000484d0
21 manta:/dev/rdsk/c6t500060E80000000000009CBB00000484d0 /dev/did/rdsk/d21
21 mulloway:/dev/rdsk/c6t500060E80000000000009CBB00000484d0 /dev/did/rdsk/d21
21 marlin:/dev/rdsk/c6t500060E80000000000009CBB00000484d0 /dev/did/rdsk/d21
root@pkend022:/root
#

# update the global devices namespace
scgdevs

# if it's a new LUN size you've been given, create a new label in /etc/format.dat.
# These 100Gb ones didn't seem to work using an explicit entry in /etc/format.dat.
# In this case, just add the type manually thru "format".
# Use the Hitachi disk spec manual for values.
# At any rate, label and partition the disks. Make slice 7 20Mb, and add the rest of the disk into
# slice 0.

#------------------------------------------------------------
# Remove a LUN on the fly...
# If you don't know the diskset/device(s) to remove, the storage
# boys will provide you with the WWN and LUN ID
# ie 50060e80042d0a06, LUN x'06'

# Issue a luxadm display using the WWN
# All luns on that port will be displayed.
# Look at the "Device Address" filed to find the right lun.

DEVICE PROPERTIES for disk: 50060e80042d0a06
Vendor: HITACHI
Product ID: OPEN-9 -SUN
Revision: 0119
Serial Num: 40123
Unformatted capacity: 7384.597 MBytes
Write Cache: Enabled
Read Cache: Enabled
Minimum prefetch: 0x0
Maximum prefetch: 0x0
Device Type: Disk device
Path(s):

/dev/rdsk/c6t500060E80000000000009CBB00000444d0s2
/devices/scsi_vhci/ssd@g500060e80000000000009cbb00000444:c,raw
Controller /devices/ssm@0,0/pci@18,600000/SUNW,qlc@1/fp@0,0
Device Address 500060e8029cbb08,6
Host controller port WWN 210000e08b0aa6fd
Class primary
State ONLINE
Controller /devices/ssm@0,0/pci@19,700000/SUNW,qlc@3/fp@0,0
Device Address 500060e8029cbb18,6
Host controller port WWN 210000e08b0e0622
Class primary
State ONLINE

DEVICE PROPERTIES for disk: 500060e8029cbb08
Vendor: HITACHI
Product ID: OPEN-9 -SUN
Revision: 0119
Serial Num: 40123
Unformatted capacity: 7384.597 MBytes
Write Cache: Enabled
Read Cache: Enabled
Minimum prefetch: 0x0
Maximum prefetch: 0x0
Device Type: Disk device
Path(s):

/dev/rdsk/c6t500060E80000000000009CBB00000445d0s2
/devices/scsi_vhci/ssd@g500060e80000000000009cbb00000445:c,raw
Controller /devices/ssm@0,0/pci@18,600000/SUNW,qlc@1/fp@0,0
Device Address 500060e8029cbb08,7
Host controller port WWN 210000e08b0aa6fd
Class primary
State ONLINE
Controller /devices/ssm@0,0/pci@19,700000/SUNW,qlc@3/fp@0,0
Device Address 500060e8029cbb18,7
Host controller port WWN 210000e08b0e0622
Class primary
State ONLINE

# Once you have this info, you have the disk device name like...
/dev/rdsk/c6t500060E80000000000009CBB00000444d0s2

# Use this to find the did device name
scdidadm -L |grep c6t500060E80000000000009CBB00000444d0
4 pkend022:/dev/rdsk/c6t500060E80000000000009CBB00000444d0 /dev/did/rdsk/d4
4 pkend023:/dev/rdsk/c6t500060E80000000000009CBB00000444d0 /dev/did/rdsk/d4
4 pkend024:/dev/rdsk/c6t500060E80000000000009CBB00000444d0 /dev/did/rdsk/d4

scdidadm -L |grep c6t500060E80000000000009CBB00000444d0
9 pkend022:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9
9 pkend023:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9
9 pkend024:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9

# So it's d4 and d9 I want to remove
# Check for their existence in metasets

metaset|grep d9

# If there's any output, you'd better take a close look at the whole output
# to find which metaset it belongs to.
# If it belongs to a metaset, remove all filesystems partitions etc.
# Finally delete the metaset.

# If you have HDS SCSI reserve errors when trying to deallocate the lun...


# Check for SCSI3 reserves using the undocumented utility /usr/cluster/lib/sc/reserve.
# Use either the did or the OS device file.
root@marlin:/usr/cluster/lib/sc
$ ./reserve -c inkeys -z /dev/did/rdsk/d9s2
Reservation keys(3):
0x3f8a0ed500000003
0x3f8a0ed500000001
0x3f8a0ed500000002
root@pkend022:/usr/cluster/lib/sc
$ scdidadm -L|grep d9
9 manta:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9
9 marlin:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9
9 mulloway:/dev/rdsk/c6t500060E80000000000009CBB00000445d0 /dev/did/rdsk/d9
root@pkend023:/usr/cluster/lib/sc
$ ./reserve -c inkeys -z /dev/rdsk/c6t500060E80000000000009CBB00000445d0s2
Reservation keys(3):
0x3f8a0ed500000003
0x3f8a0ed500000001
0x3f8a0ed500000002
root@marlin:/usr/cluster/lib/sc
$




root@marlin:/usr/cluster/lib/sc
$ ./reserve -c scrub -z /dev/rdsk/c6t500060E80000000000009CBB00000445d0s2
Reservation keys currently on disk:
0x3f8a0ed500000003
0x3f8a0ed500000001
0x3f8a0ed500000002
Attempting to remove all keys from the disk...
May 26 17:44:57 marlin last message repeated 1 time
May 26 17:46:44 marlin scsi: WARNING: /scsi_vhci/ssd@g500060e80000000000009cbb00000445 (ssd5):
May 26 17:46:44 marlin Error for Command: Error Level: Informational
Scrubbing complete, use 'reserve -c inkeys -z /dev/rdsk/c6t500060E80000000000009CBB00000445d0s2' to verify success
root@marlin:/usr/cluster/lib/sc
$ May 26 17:46:44 marlin scsi: Requested Block: 0 Error Block: 0
May 26 17:46:44 marlin scsi: Vendor: HITACHI Serial Number: 04009CBB0445
May 26 17:46:44 marlin scsi: Sense Key: Unit Attention
May 26 17:46:44 marlin scsi: ASC: 0x2a (), ASCQ: 0x4, FRU: 0x0

root@marlin:/usr/cluster/lib/sc
$ ./reserve -c inkeys -z /dev/rdsk/c6t500060E80000000000009CBB00000445d0s2
Reservation keys(0):
root@marlin:/usr/cluster/lib/sc

# run devfsadm to remove device files
devfsadm -C -c disk

# clean up the did devices
scdidadm -C


#------------------------------------------------------------
# create a new diskset

# create metaset and mediators
metaset -s ds04 -a -h manta mulloway marlin
metaset -s ds04 -a -m mulloway manta

# add disk to the metaset
metaset -s ds04 -a /dev/did/rdsk/d21 /dev/did/rdsk/d22

# check status
metaset -s ds04
metadb -s ds04
medstat -s ds04

# create the first concat
metainit -s ds04 d0 2 1 /dev/did/rdsk/d21s0 1 /dev/did/rdsk/d22s0

# create soft partitions
root@manta:init.d
# metainit -s ds04 d1 -p d0 10g
d1: Soft Partition is setup
root@manta:init.d
# metainit -s ds04 d2 -p d0 10g
d2: Soft Partition is setup
root@manta:init.d
#

# create default ufs filesystems
newfs /dev/md/ds04/rdsk/d1
newfs /dev/md/ds04/rdsk/d2

# check required filesystem settings using...
# mkfs -m /dev/md/ds04/rdsk/d1
mkfs -F ufs -o nsect=120,ntrack=56,bsize=8192,fragsize=1024,cgsize=16,free=1,rps=166,nbpi=8239,opt=t,apc=0,gap=0,nrpos=8,maxcontig=16 /dev/md/ds04/rdsk/d1 20971520


# create resource group
scrgadm -a -g super1 -h manta,mulloway,marlin -y RG_description="Summit Production"

# create StoragePlus resource
scrgadm -a -j super1-ds04 -t SUNW.HAStoragePlus -g super1 \
-x FileSystemMountPoints=/opt/smt,/opt/oraclest \
-x AffinityOn=true


# create logical hostname resource
scrgadm -a -L -g super1 -j super1-ip -l super1


# create the super1 apache application resource
scrgadm -a -j super1-apache -t EUM.super1 -g super1 -y Resource_dependencies=super1-ds04 -x Eum_admin_dir=/opt/smt/admin/bin