mirror of https://github.com/borgbackup/borg.git
Add tests for stale lock killing and platform.process_alive
This commit is contained in:
parent
2bd8ac7762
commit
cc14975f2d
|
@ -130,8 +130,7 @@ class ExclusiveLock:
|
|||
except FileExistsError: # already locked
|
||||
if self.by_me():
|
||||
return self
|
||||
if self.kill_stale_lock():
|
||||
pass
|
||||
self.kill_stale_lock()
|
||||
if timer.timed_out_or_sleep():
|
||||
raise LockTimeout(self.path)
|
||||
except OSError as err:
|
||||
|
@ -167,7 +166,7 @@ class ExclusiveLock:
|
|||
# It's safer to just exit
|
||||
return False
|
||||
|
||||
if not platform.process_alive(host, pid, thread):
|
||||
if platform.process_alive(host, pid, thread):
|
||||
return False
|
||||
|
||||
if not self.ok_to_kill_stale_locks:
|
||||
|
@ -224,17 +223,17 @@ class LockRoster:
|
|||
# Just nuke the stale locks early on load
|
||||
if self.ok_to_kill_zombie_locks:
|
||||
for key in (SHARED, EXCLUSIVE):
|
||||
elements = set()
|
||||
try:
|
||||
for e in data[key]:
|
||||
(host, pid, thread) = e
|
||||
if not platform.process_alive(host, pid, thread):
|
||||
elements.add(tuple(e))
|
||||
else:
|
||||
logger.warning('Removed stale %s roster lock for pid %d.', key, pid)
|
||||
data[key] = list(list(e) for e in elements)
|
||||
entries = data[key]
|
||||
except KeyError:
|
||||
pass
|
||||
continue
|
||||
elements = set()
|
||||
for host, pid, thread in entries:
|
||||
if platform.process_alive(host, pid, thread):
|
||||
elements.add((host, pid, thread))
|
||||
else:
|
||||
logger.warning('Removed stale %s roster lock for pid %d.', key, pid)
|
||||
data[key] = list(elements)
|
||||
except (FileNotFoundError, ValueError):
|
||||
# no or corrupt/empty roster file?
|
||||
data = {}
|
||||
|
|
|
@ -35,18 +35,23 @@ def get_process_id():
|
|||
|
||||
|
||||
def process_alive(host, pid, thread):
|
||||
"""Check if the (host, pid, thread_id) combination corresponds to a dead process on our local node or not."""
|
||||
"""
|
||||
Check if the (host, pid, thread_id) combination corresponds to a potentially alive process.
|
||||
|
||||
If the process is local, then this will be accurate. If the process is not local, then this
|
||||
returns always True, since there is no real way to check.
|
||||
"""
|
||||
from . import local_pid_alive
|
||||
|
||||
if host != _hostname:
|
||||
return False
|
||||
return True
|
||||
|
||||
if thread != 0:
|
||||
# Currently thread is always 0, if we ever decide to set this to a non-zero value,
|
||||
# this code needs to be revisited, too, to do a sensible thing
|
||||
return False
|
||||
return True
|
||||
|
||||
return local_pid_alive
|
||||
return local_pid_alive(pid)
|
||||
|
||||
def local_pid_alive(pid):
|
||||
"""Return whether *pid* is alive."""
|
||||
|
@ -62,4 +67,4 @@ def local_pid_alive(pid):
|
|||
# ESRCH = no such process
|
||||
return False
|
||||
# Any other error (eg. permissions) mean that the process ID refers to a live process
|
||||
return True
|
||||
return True
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
import random
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from ..locking import get_id, TimeoutTimer, ExclusiveLock, Lock, LockRoster, \
|
||||
ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout
|
||||
|
||||
from ..platform import get_process_id, process_alive
|
||||
from ..locking import TimeoutTimer, ExclusiveLock, Lock, LockRoster, \
|
||||
ADD, REMOVE, SHARED, EXCLUSIVE, LockTimeout, NotLocked, NotMyLock
|
||||
|
||||
ID1 = "foo", 1, 1
|
||||
ID2 = "bar", 2, 2
|
||||
|
||||
|
||||
def test_id():
|
||||
hostname, pid, tid = get_id()
|
||||
assert isinstance(hostname, str)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
assert len(hostname) > 0
|
||||
assert pid > 0
|
||||
@pytest.fixture()
|
||||
def free_pid():
|
||||
"""Return a free PID not used by any process (naturally this is racy)"""
|
||||
host, pid, tid = get_process_id()
|
||||
while True:
|
||||
# PIDs are often restricted to a small range. On Linux the range >32k is by default not used.
|
||||
pid = random.randint(33000, 65000)
|
||||
if not process_alive(host, pid, tid):
|
||||
return pid
|
||||
|
||||
|
||||
class TestTimeoutTimer:
|
||||
|
@ -57,6 +60,22 @@ class TestExclusiveLock:
|
|||
with pytest.raises(LockTimeout):
|
||||
ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire()
|
||||
|
||||
def test_kill_stale(self, lockpath, free_pid):
|
||||
host, pid, tid = our_id = get_process_id()
|
||||
dead_id = host, free_pid, tid
|
||||
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
|
||||
|
||||
dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
|
||||
with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True):
|
||||
with pytest.raises(NotMyLock):
|
||||
dead_lock.release()
|
||||
with pytest.raises(NotLocked):
|
||||
dead_lock.release()
|
||||
|
||||
with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
|
||||
with pytest.raises(LockTimeout):
|
||||
ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
|
||||
|
||||
|
||||
class TestLock:
|
||||
def test_shared(self, lockpath):
|
||||
|
@ -117,6 +136,25 @@ class TestLock:
|
|||
with pytest.raises(LockTimeout):
|
||||
Lock(lockpath, exclusive=True, id=ID2, timeout=0.1).acquire()
|
||||
|
||||
def test_kill_stale(self, lockpath, free_pid):
|
||||
host, pid, tid = our_id = get_process_id()
|
||||
dead_id = host, free_pid, tid
|
||||
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
|
||||
|
||||
dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
|
||||
roster = dead_lock._roster
|
||||
with Lock(lockpath, id=our_id, kill_stale_locks=True):
|
||||
assert roster.get(EXCLUSIVE) == set()
|
||||
assert roster.get(SHARED) == {our_id}
|
||||
assert roster.get(EXCLUSIVE) == set()
|
||||
assert roster.get(SHARED) == set()
|
||||
with pytest.raises(KeyError):
|
||||
dead_lock.release()
|
||||
|
||||
with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
|
||||
with pytest.raises(LockTimeout):
|
||||
Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def rosterpath(tmpdir):
|
||||
|
@ -144,3 +182,28 @@ class TestLockRoster:
|
|||
roster2 = LockRoster(rosterpath, id=ID2)
|
||||
roster2.modify(SHARED, REMOVE)
|
||||
assert roster2.get(SHARED) == set()
|
||||
|
||||
def test_kill_stale(self, rosterpath, free_pid):
|
||||
host, pid, tid = our_id = get_process_id()
|
||||
dead_id = host, free_pid, tid
|
||||
|
||||
roster1 = LockRoster(rosterpath, id=dead_id)
|
||||
assert roster1.get(SHARED) == set()
|
||||
roster1.modify(SHARED, ADD)
|
||||
assert roster1.get(SHARED) == {dead_id}
|
||||
|
||||
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
|
||||
roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
|
||||
assert roster1.get(SHARED) == {dead_id}
|
||||
roster1.modify(SHARED, ADD)
|
||||
assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}
|
||||
|
||||
killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
|
||||
# Did kill the dead processes lock (which was alive ... I guess?!)
|
||||
assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
|
||||
killer_roster.modify(SHARED, ADD)
|
||||
assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
|
||||
|
||||
other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
|
||||
# Did not kill us, since we're alive
|
||||
assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import functools
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
@ -7,7 +8,9 @@ import pwd
|
|||
import unittest
|
||||
|
||||
from ..platform import acl_get, acl_set, swidth
|
||||
from ..platform import get_process_id, process_alive
|
||||
from . import BaseTestCase, unopened_tempfile
|
||||
from .locking import free_pid
|
||||
|
||||
|
||||
ACCESS_ACL = """
|
||||
|
@ -186,3 +189,22 @@ class PlatformPosixTestCase(BaseTestCase):
|
|||
|
||||
def test_swidth_mixed(self):
|
||||
self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2)
|
||||
|
||||
|
||||
def test_process_alive(free_pid):
|
||||
id = get_process_id()
|
||||
assert process_alive(*id)
|
||||
host, pid, tid = id
|
||||
assert process_alive(host + 'abc', pid, tid)
|
||||
assert process_alive(host, pid, tid + 1)
|
||||
assert not process_alive(host, free_pid, tid)
|
||||
|
||||
|
||||
def test_process_id():
|
||||
hostname, pid, tid = get_process_id()
|
||||
assert isinstance(hostname, str)
|
||||
assert isinstance(pid, int)
|
||||
assert isinstance(tid, int)
|
||||
assert len(hostname) > 0
|
||||
assert pid > 0
|
||||
assert get_process_id() == (hostname, pid, tid)
|
||||
|
|
Loading…
Reference in New Issue