Add tests for stale lock killing and platform.process_alive

This commit is contained in:
Marian Beermann 2016-10-02 10:54:36 +02:00
parent 2bd8ac7762
commit cc14975f2d
4 changed files with 116 additions and 27 deletions

View File

@ -130,8 +130,7 @@ class ExclusiveLock:
except FileExistsError: # already locked
if self.by_me():
return self
if self.kill_stale_lock():
pass
self.kill_stale_lock()
if timer.timed_out_or_sleep():
raise LockTimeout(self.path)
except OSError as err:
@ -167,7 +166,7 @@ class ExclusiveLock:
# It's safer to just exit
return False
if not platform.process_alive(host, pid, thread):
if platform.process_alive(host, pid, thread):
return False
if not self.ok_to_kill_stale_locks:
@ -224,17 +223,17 @@ class LockRoster:
# Just nuke the stale locks early on load
if self.ok_to_kill_zombie_locks:
for key in (SHARED, EXCLUSIVE):
elements = set()
try:
for e in data[key]:
(host, pid, thread) = e
if not platform.process_alive(host, pid, thread):
elements.add(tuple(e))
else:
logger.warning('Removed stale %s roster lock for pid %d.', key, pid)
data[key] = list(list(e) for e in elements)
entries = data[key]
except KeyError:
pass
continue
elements = set()
for host, pid, thread in entries:
if platform.process_alive(host, pid, thread):
elements.add((host, pid, thread))
else:
logger.warning('Removed stale %s roster lock for pid %d.', key, pid)
data[key] = list(elements)
except (FileNotFoundError, ValueError):
# no or corrupt/empty roster file?
data = {}

View File

@ -35,18 +35,23 @@ def get_process_id():
def process_alive(host, pid, thread):
"""Check if the (host, pid, thread_id) combination corresponds to a dead process on our local node or not."""
"""
Check if the (host, pid, thread_id) combination corresponds to a potentially alive process.
If the process is local, then this will be accurate. If the process is not local, then this
returns always True, since there is no real way to check.
"""
from . import local_pid_alive
if host != _hostname:
return False
return True
if thread != 0:
# Currently thread is always 0, if we ever decide to set this to a non-zero value,
# this code needs to be revisited, too, to do a sensible thing
return False
return True
return local_pid_alive
return local_pid_alive(pid)
def local_pid_alive(pid):
"""Return whether *pid* is alive."""
@ -62,4 +67,4 @@ def local_pid_alive(pid):
# ESRCH = no such process
return False
# Any other error (eg. permissions) mean that the process ID refers to a live process
return True
return True

View File

@ -1,22 +1,25 @@
import random
import time
import pytest
from ..locking import get_id, TimeoutTimer, ExclusiveLock, Lock, LockRoster, \
ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout
from ..platform import get_process_id, process_alive
from ..locking import TimeoutTimer, ExclusiveLock, Lock, LockRoster, \
ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout, NotLocked, NotMyLock
ID1 = "foo", 1, 1
ID2 = "bar", 2, 2
def test_id():
hostname, pid, tid = get_id()
assert isinstance(hostname, str)
assert isinstance(pid, int)
assert isinstance(tid, int)
assert len(hostname) > 0
assert pid > 0
@pytest.fixture()
def free_pid():
"""Return a free PID not used by any process (naturally this is racy)"""
host, pid, tid = get_process_id()
while True:
# PIDs are often restricted to a small range. On Linux the range >32k is by default not used.
pid = random.randint(33000, 65000)
if not process_alive(host, pid, tid):
return pid
class TestTimeoutTimer:
@ -57,6 +60,22 @@ class TestExclusiveLock:
with pytest.raises(LockTimeout):
ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire()
def test_kill_stale(self, lockpath, free_pid):
host, pid, tid = our_id = get_process_id()
dead_id = host, free_pid, tid
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True):
with pytest.raises(NotMyLock):
dead_lock.release()
with pytest.raises(NotLocked):
dead_lock.release()
with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
with pytest.raises(LockTimeout):
ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
class TestLock:
def test_shared(self, lockpath):
@ -117,6 +136,25 @@ class TestLock:
with pytest.raises(LockTimeout):
Lock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire()
def test_kill_stale(self, lockpath, free_pid):
host, pid, tid = our_id = get_process_id()
dead_id = host, free_pid, tid
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
roster = dead_lock._roster
with Lock(lockpath, id=our_id, kill_stale_locks=True):
assert roster.get(EXCLUSIVE) == set()
assert roster.get(SHARED) == {our_id}
assert roster.get(EXCLUSIVE) == set()
assert roster.get(SHARED) == set()
with pytest.raises(KeyError):
dead_lock.release()
with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
with pytest.raises(LockTimeout):
Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
@pytest.fixture()
def rosterpath(tmpdir):
@ -144,3 +182,28 @@ class TestLockRoster:
roster2 = LockRoster(rosterpath, id=ID2)
roster2.modify(SHARED, REMOVE)
assert roster2.get(SHARED) == set()
def test_kill_stale(self, rosterpath, free_pid):
host, pid, tid = our_id = get_process_id()
dead_id = host, free_pid, tid
roster1 = LockRoster(rosterpath, id=dead_id)
assert roster1.get(SHARED) == set()
roster1.modify(SHARED, ADD)
assert roster1.get(SHARED) == {dead_id}
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
assert roster1.get(SHARED) == {dead_id}
roster1.modify(SHARED, ADD)
assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}
killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
# Did kill the dead processes lock (which was alive ... I guess?!)
assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
killer_roster.modify(SHARED, ADD)
assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
# Did not kill us, since we're alive
assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}

View File

@ -1,5 +1,6 @@
import functools
import os
import random
import shutil
import sys
import tempfile
@ -7,7 +8,9 @@ import pwd
import unittest
from ..platform import acl_get, acl_set, swidth
from ..platform import get_process_id, process_alive
from . import BaseTestCase, unopened_tempfile
from .locking import free_pid
ACCESS_ACL = """
@ -186,3 +189,22 @@ class PlatformPosixTestCase(BaseTestCase):
def test_swidth_mixed(self):
self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2)
def test_process_alive(free_pid):
id = get_process_id()
assert process_alive(*id)
host, pid, tid = id
assert process_alive(host + 'abc', pid, tid)
assert process_alive(host, pid, tid + 1)
assert not process_alive(host, free_pid, tid)
def test_process_id():
hostname, pid, tid = get_process_id()
assert isinstance(hostname, str)
assert isinstance(pid, int)
assert isinstance(tid, int)
assert len(hostname) > 0
assert pid > 0
assert get_process_id() == (hostname, pid, tid)