[sheepdog] Bug: strange behavior after restart sheeps - potential loss of data

Jens WEBER jweber at tek2b.org
Tue Jul 17 16:49:41 CEST 2012


what I've done

root at sheep01:/home/jens/sheepdog/debian# collie cluster format -H -c 1 
using backend farm store
root at sheep01:/home/jens/sheepdog/debian# collie vdi create  test 50M -P
root at sheep01:/home/jens/sheepdog/debian# collie node info
Id	Size	Used	Use%
 0	238 MB	32 MB	 13%
 1	238 MB	24 MB	 10%
Total	475 MB	56 MB	 11%

Total virtual image size	50 MB
root at sheep01:/home/jens/sheepdog/debian# /etc/init.d/sheepdog stop
[ ok ] Stopping sheepdog: sheepdog.
root at sheep01:/home/jens/sheepdog/debian# /etc/init.d/sheepdog start
[ ok ] Starting sheepdog : sheepdog.
root at sheep01:/home/jens/sheepdog/debian# collie node info
Id	Size	Used	Use%
 0	238 MB	32 MB	 13%
 1	238 MB	0.0 MB	  0% <---- strange, isn't it
Total	475 MB	32 MB	  6%

Total virtual image size	50 MB

root at sheep01:/home/jens/sheepdog/debian# tail -n 40 /var/lib/sheepdog/disc1/sheep.log 
Jul 17 16:38:55 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:55 [main] clear_client(709) refcnt:0, fd:15, 172.30.0.80:34198
Jul 17 16:38:55 [main] destroy_client(678) connection from: 172.30.0.80:34198
Jul 17 16:38:55 [io 3] do_process_work(1029) 15, 0 , 3
Jul 17 16:38:55 [main] client_tx_handler(669) connection from: 14, 172.30.0.80:34197
Jul 17 16:38:55 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:55 [main] clear_client(709) refcnt:0, fd:14, 172.30.0.80:34197
Jul 17 16:38:55 [main] destroy_client(678) connection from: 172.30.0.80:34197
Jul 17 16:38:57 [main] listen_handler(825) accepted a new connection: 13
Jul 17 16:38:57 [main] client_rx_handler(583) connection from: 13, 127.0.0.1:46561
Jul 17 16:38:57 [main] queue_request(329) GET_NODE_LIST
Jul 17 16:38:57 [io 4] do_process_work(1029) 82, 0 , 3
Jul 17 16:38:57 [main] client_tx_handler(669) connection from: 13, 127.0.0.1:46561
Jul 17 16:38:57 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:57 [main] clear_client(709) refcnt:0, fd:13, 127.0.0.1:46561
Jul 17 16:38:57 [main] destroy_client(678) connection from: 127.0.0.1:46561
Jul 17 16:38:57 [main] listen_handler(825) accepted a new connection: 13
Jul 17 16:38:57 [main] client_rx_handler(583) connection from: 13, 172.30.0.80:34201
Jul 17 16:38:57 [main] queue_request(329) STAT_SHEEP
Jul 17 16:38:57 [io 5] do_process_work(1029) 86, 0 , 3
Jul 17 16:38:57 [main] client_tx_handler(669) connection from: 13, 172.30.0.80:34201
Jul 17 16:38:57 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:57 [main] clear_client(709) refcnt:0, fd:13, 172.30.0.80:34201
Jul 17 16:38:57 [main] destroy_client(678) connection from: 172.30.0.80:34201
Jul 17 16:38:57 [main] listen_handler(825) accepted a new connection: 13
Jul 17 16:38:57 [main] client_rx_handler(583) connection from: 13, 127.0.0.1:46564
Jul 17 16:38:57 [main] queue_request(329) READ_VDIS
Jul 17 16:38:57 [io 6] do_process_work(1029) 15, 0 , 3
Jul 17 16:38:57 [main] client_tx_handler(669) connection from: 13, 127.0.0.1:46564
Jul 17 16:38:57 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:57 [main] clear_client(709) refcnt:0, fd:13, 127.0.0.1:46564
Jul 17 16:38:57 [main] destroy_client(678) connection from: 127.0.0.1:46564
Jul 17 16:38:57 [main] listen_handler(825) accepted a new connection: 13
Jul 17 16:38:57 [main] client_rx_handler(583) connection from: 13, 127.0.0.1:46565
Jul 17 16:38:57 [main] queue_request(329) READ_OBJ
Jul 17 16:38:57 [gateway 7] do_process_work(1029) 2, 807c2b2500000000 , 3
Jul 17 16:38:57 [main] client_tx_handler(669) connection from: 13, 127.0.0.1:46565
Jul 17 16:38:57 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:57 [main] clear_client(709) refcnt:0, fd:13, 127.0.0.1:46565
Jul 17 16:38:57 [main] destroy_client(678) connection from: 127.0.0.1:46565
root at sheep01:/home/jens/sheepdog/debian# tail -n 40 /var/lib/sheepdog/disc2/sheep.log 
Jul 17 16:38:55 [main] create_cluster(1124) zone id = 2
Jul 17 16:38:55 [main] send_join_request(964) IPv4 ip:172.30.0.80 port:7001
Jul 17 16:38:55 [main] main(272) sheepdog daemon (version 0.4.0) started
Jul 17 16:38:55 [main] cdrv_cpg_confchg(568) mem:2, joined:1, left:0
Jul 17 16:38:55 [main] cdrv_cpg_confchg(634) Not promoting because member is not in our event list.
Jul 17 16:38:55 [main] cdrv_cpg_deliver(454) 0
Jul 17 16:38:55 [main] cdrv_cpg_deliver(454) 1
Jul 17 16:38:55 [main] sd_join_handler(993) join IPv4 ip:172.30.0.80 port:7001
Jul 17 16:38:55 [main] sd_join_handler(995) [0] IPv4 ip:172.30.0.80 port:7000
Jul 17 16:38:55 [main] sd_join_handler(995) [1] IPv4 ip:172.30.0.80 port:7001
Jul 17 16:38:55 [main] update_cluster_info(780) status = 1, epoch = 2, finished: 0
Jul 17 16:38:55 [main] cleanup_working_dir(635) try clean up working dir
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b2500000004
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b2500000003
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b250000000c
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b2500000009
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b2500000006
Jul 17 16:38:55 [main] cleanup_working_dir(649) remove file 007c2b250000000a
Jul 17 16:38:55 [main] trunk_reset(402) clean
Jul 17 16:38:55 [main] sockfd_cache_add_group(242) 2
Jul 17 16:38:55 [main] update_epoch_log(55) update epoch: 3, 2
Jul 17 16:38:55 [block] connect_to(256) 13, 172.30.0.80:7000
Jul 17 16:38:55 [block] get_vdi_bitmap_from(665) 172.30.0.80:7000
Jul 17 16:38:55 [recovery] prepare_object_list(553) 3
Jul 17 16:38:55 [recovery] fetch_object_list(475) 172.30.0.80 7000
Jul 17 16:38:55 [recovery] connect_to(256) 14, 172.30.0.80:7000
Jul 17 16:38:55 [main] sd_join_handler(1004) join Sheepdog cluster
Jul 17 16:38:55 [recovery] fetch_object_list(502) 8
Jul 17 16:38:55 [recovery] prepare_object_list(582) 0
Jul 17 16:38:55 [main] farm_end_recover(578) old epoch 2
Jul 17 16:38:55 [main] snap_file_write(178) epoch: 2, sha1: 876ff2246ae70e3c41a8c7699bd794f23b971eed
Jul 17 16:38:55 [main] finish_recovery(349) recovery complete: new epoch 3
Jul 17 16:38:57 [main] listen_handler(825) accepted a new connection: 13
Jul 17 16:38:57 [main] client_rx_handler(583) connection from: 13, 172.30.0.80:33799
Jul 17 16:38:57 [main] queue_request(329) STAT_SHEEP
Jul 17 16:38:57 [io 1] do_process_work(1029) 86, 0 , 3
Jul 17 16:38:57 [main] client_tx_handler(669) connection from: 13, 172.30.0.80:33799
Jul 17 16:38:57 [main] client_handler(770) connection seems to be dead
Jul 17 16:38:57 [main] clear_client(709) refcnt:0, fd:13, 172.30.0.80:33799
Jul 17 16:38:57 [main] destroy_client(678) connection from: 172.30.0.80:33799

another test, same result
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# /etc/init.d/sheepdog stop
[ ok ] Stopping sheepdog: sheepdog.
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# rm -r /var/lib/sheepdog/disc1/*
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# rm -r /var/lib/sheepdog/disc2/*
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# /etc/init.d/sheepdog start
[ ok ] Starting sheepdog : sheepdog.
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# collie cluster format -H -c 1 
using backend farm store
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# collie vdi create  test 50M -P
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# ./vdidetails test
Node List

	Id	Host:Port
	0	172.30.0.80:7000
	1	172.30.0.80:7001

Details of vdi test, size 50 MB, blocks 12.5
 - : block does not exist or not allocated, # : block is here

	4MB block	  [node0,node1,...]
	metadata object	: [#-]
	0		: [#-]
	1		: [#-]
	2		: [#-]
	3		: [-#]
	4		: [-#]
	5		: [#-]
	6		: [-#]
	7		: [#-]
	8		: [#-]
	9		: [-#]
	10		: [-#]
	11		: [#-]
	12		: [-#]
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# /etc/init.d/sheepdog stop
[ ok ] Stopping sheepdog: sheepdog.
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# /etc/init.d/sheepdog start
[ ok ] Starting sheepdog : sheepdog.
root at sheep01:/home/jens/sheepdog/sheepdog-tools-0.4.0-0+tek2b/src# ./vdidetails test
Node List

	Id	Host:Port
	0	172.30.0.80:7000
	1	172.30.0.80:7001

Details of vdi test, size 50 MB, blocks 12.5
 - : block does not exist or not allocated, # : block is here

	4MB block	  [node0,node1,...]
	metadata object	: [#-]
	0		: [#-]
	1		: [#-]
	2		: [#-]
	3		: [--]
	4		: [--]
	5		: [#-]
	6		: [--]
	7		: [#-]
	8		: [#-]
	9		: [--]
	10		: [--]
	11		: [#-]
	12		: [--]



More information about the sheepdog mailing list