transmission/libtransmission/fdlimit.c

758 lines
17 KiB
C

/*
* This file Copyright (C) Mnemosyne LLC
*
* This file is licensed by the GPL version 2. Works owned by the
* Transmission project are granted a special exemption to clause 2 (b)
* so that the bulk of its code can remain under the MIT license.
* This exemption does not extend to derived works not owned by
* the Transmission project.
*
* $Id$
*/
#ifdef HAVE_POSIX_FADVISE
#ifdef _XOPEN_SOURCE
#undef _XOPEN_SOURCE
#endif
#define _XOPEN_SOURCE 600
#endif
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <string.h>
#ifdef SYS_DARWIN
#include <fcntl.h>
#endif
#ifdef HAVE_FALLOCATE64
/* FIXME can't find the right #include voodoo to pick up the declaration.. */
extern int fallocate64 (int fd, int mode, uint64_t offset, uint64_t len);
#endif
#ifdef HAVE_XFS_XFS_H
#include <xfs/xfs.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h> /* getrlimit */
#include <sys/resource.h> /* getrlimit */
#include <fcntl.h> /* O_LARGEFILE posix_fadvise */
#include <unistd.h> /* lseek (), write (), ftruncate (), pread (), pwrite (), etc */
#include "transmission.h"
#include "fdlimit.h"
#include "net.h"
#include "session.h"
#include "torrent.h" /* tr_isTorrent () */
#define dbgmsg(...) \
do { \
if (tr_deepLoggingIsActive ()) \
tr_deepLog (__FILE__, __LINE__, NULL, __VA_ARGS__); \
} while (0)
/***
****
**** Local Files
****
***/
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
#ifndef O_SEQUENTIAL
#define O_SEQUENTIAL 0
#endif
static bool
preallocate_file_sparse (int fd, uint64_t length)
{
const char zero = '\0';
bool success = 0;
if (!length)
success = true;
#ifdef HAVE_FALLOCATE64
if (!success) /* fallocate64 is always preferred, so try it first */
success = !fallocate64 (fd, 0, 0, length);
#endif
if (!success) /* fallback: the old-style seek-and-write */
success = (lseek (fd, length-1, SEEK_SET) != -1)
&& (write (fd, &zero, 1) != -1)
&& (ftruncate (fd, length) != -1);
return success;
}
static bool
preallocate_file_full (const char * filename, uint64_t length)
{
bool success = 0;
#ifdef WIN32
HANDLE hFile = CreateFile (filename, GENERIC_WRITE, 0, 0, CREATE_NEW, FILE_FLAG_RANDOM_ACCESS, 0);
if (hFile != INVALID_HANDLE_VALUE)
{
LARGE_INTEGER li;
li.QuadPart = length;
success = SetFilePointerEx (hFile, li, NULL, FILE_BEGIN) && SetEndOfFile (hFile);
CloseHandle (hFile);
}
#else
int flags = O_RDWR | O_CREAT | O_LARGEFILE;
int fd = open (filename, flags, 0666);
if (fd >= 0)
{
# ifdef HAVE_FALLOCATE64
if (!success)
success = !fallocate64 (fd, 0, 0, length);
# endif
# ifdef HAVE_XFS_XFS_H
if (!success && platform_test_xfs_fd (fd))
{
xfs_flock64_t fl;
fl.l_whence = 0;
fl.l_start = 0;
fl.l_len = length;
success = !xfsctl (NULL, fd, XFS_IOC_RESVSP64, &fl);
}
# endif
# ifdef SYS_DARWIN
if (!success)
{
fstore_t fst;
fst.fst_flags = F_ALLOCATECONTIG;
fst.fst_posmode = F_PEOFPOSMODE;
fst.fst_offset = 0;
fst.fst_length = length;
fst.fst_bytesalloc = 0;
success = !fcntl (fd, F_PREALLOCATE, &fst);
}
# endif
# ifdef HAVE_POSIX_FALLOCATE
if (!success)
success = !posix_fallocate (fd, 0, length);
# endif
if (!success) /* if nothing else works, do it the old-fashioned way */
{
uint8_t buf[ 4096 ];
memset (buf, 0, sizeof (buf));
success = true;
while (success && (length > 0))
{
const int thisPass = MIN (length, sizeof (buf));
success = write (fd, buf, thisPass) == thisPass;
length -= thisPass;
}
}
close (fd);
}
#endif
return success;
}
/* portability wrapper for fsync (). */
int
tr_fsync (int fd)
{
#ifdef WIN32
return _commit (fd);
#else
return fsync (fd);
#endif
}
/* Like pread and pwrite, except that the position is undefined afterwards.
And of course they are not thread-safe. */
/* don't use pread/pwrite on old versions of uClibc because they're buggy.
* https://trac.transmissionbt.com/ticket/3826 */
#ifdef __UCLIBC__
#define TR_UCLIBC_CHECK_VERSION(major,minor,micro) \
(__UCLIBC_MAJOR__ > (major) || \
(__UCLIBC_MAJOR__ == (major) && __UCLIBC_MINOR__ > (minor)) || \
(__UCLIBC_MAJOR__ == (major) && __UCLIBC_MINOR__ == (minor) && \
__UCLIBC_SUBLEVEL__ >= (micro)))
#if !TR_UCLIBC_CHECK_VERSION (0,9,28)
#undef HAVE_PREAD
#undef HAVE_PWRITE
#endif
#endif
#ifdef SYS_DARWIN
#define HAVE_PREAD
#define HAVE_PWRITE
#endif
ssize_t
tr_pread (int fd, void *buf, size_t count, off_t offset)
{
#ifdef HAVE_PREAD
return pread (fd, buf, count, offset);
#else
const off_t lrc = lseek (fd, offset, SEEK_SET);
if (lrc < 0)
return -1;
return read (fd, buf, count);
#endif
}
ssize_t
tr_pwrite (int fd, const void *buf, size_t count, off_t offset)
{
#ifdef HAVE_PWRITE
return pwrite (fd, buf, count, offset);
#else
const off_t lrc = lseek (fd, offset, SEEK_SET);
if (lrc < 0)
return -1;
return write (fd, buf, count);
#endif
}
int
tr_prefetch (int fd UNUSED, off_t offset UNUSED, size_t count UNUSED)
{
#ifdef HAVE_POSIX_FADVISE
return posix_fadvise (fd, offset, count, POSIX_FADV_WILLNEED);
#elif defined (SYS_DARWIN)
struct radvisory radv;
radv.ra_offset = offset;
radv.ra_count = count;
return fcntl (fd, F_RDADVISE, &radv);
#else
return 0;
#endif
}
void
tr_set_file_for_single_pass (int fd)
{
if (fd >= 0)
{
/* Set hints about the lookahead buffer and caching. It's okay
for these to fail silently, so don't let them affect errno */
const int err = errno;
#ifdef HAVE_POSIX_FADVISE
posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
#ifdef SYS_DARWIN
fcntl (fd, F_RDAHEAD, 1);
fcntl (fd, F_NOCACHE, 1);
#endif
errno = err;
}
}
static int
open_local_file (const char * filename, int flags)
{
const int fd = open (filename, flags, 0666);
tr_set_file_for_single_pass (fd);
return fd;
}
int
tr_open_file_for_writing (const char * filename)
{
return open_local_file (filename, O_LARGEFILE|O_BINARY|O_CREAT|O_WRONLY);
}
int
tr_open_file_for_scanning (const char * filename)
{
return open_local_file (filename, O_LARGEFILE|O_BINARY|O_SEQUENTIAL|O_RDONLY);
}
void
tr_close_file (int fd)
{
#if defined (HAVE_POSIX_FADVISE)
/* Set hint about not caching this file.
It's okay for this to fail silently, so don't let it affect errno */
const int err = errno;
posix_fadvise (fd, 0, 0, POSIX_FADV_DONTNEED);
errno = err;
#endif
#ifdef SYS_DARWIN
/* it's unclear to me from the man pages if this actually flushes out the cache,
* but it couldn't hurt... */
fcntl (fd, F_NOCACHE, 1);
#endif
close (fd);
}
/*****
******
******
******
*****/
struct tr_cached_file
{
bool is_writable;
int fd;
int torrent_id;
tr_file_index_t file_index;
time_t used_at;
};
static inline bool
cached_file_is_open (const struct tr_cached_file * o)
{
assert (o != NULL);
return o->fd >= 0;
}
static void
cached_file_close (struct tr_cached_file * o)
{
assert (cached_file_is_open (o));
tr_close_file (o->fd);
o->fd = -1;
}
/**
* returns 0 on success, or an errno value on failure.
* errno values include ENOENT if the parent folder doesn't exist,
* plus the errno values set by tr_mkdirp () and open ().
*/
static int
cached_file_open (struct tr_cached_file * o,
const char * filename,
bool writable,
tr_preallocation_mode allocation,
uint64_t file_size)
{
int flags;
struct stat sb;
bool alreadyExisted;
/* create subfolders, if any */
if (writable)
{
char * dir = tr_dirname (filename);
const int err = tr_mkdirp (dir, 0777) ? errno : 0;
if (err)
{
tr_err (_("Couldn't create \"%1$s\": %2$s"), dir, tr_strerror (err));
tr_free (dir);
return err;
}
tr_free (dir);
}
alreadyExisted = !stat (filename, &sb) && S_ISREG (sb.st_mode);
if (writable && !alreadyExisted && (allocation == TR_PREALLOCATE_FULL))
if (preallocate_file_full (filename, file_size))
tr_dbg ("Preallocated file \"%s\"", filename);
/* open the file */
flags = writable ? (O_RDWR | O_CREAT) : O_RDONLY;
flags |= O_LARGEFILE | O_BINARY | O_SEQUENTIAL;
o->fd = open (filename, flags, 0666);
if (o->fd == -1)
{
const int err = errno;
tr_err (_("Couldn't open \"%1$s\": %2$s"), filename, tr_strerror (err));
return err;
}
/* If the file already exists and it's too large, truncate it.
* This is a fringe case that happens if a torrent's been updated
* and one of the updated torrent's files is smaller.
* http://trac.transmissionbt.com/ticket/2228
* https://bugs.launchpad.net/ubuntu/+source/transmission/+bug/318249
*/
if (alreadyExisted && (file_size < (uint64_t)sb.st_size))
{
if (ftruncate (o->fd, file_size) == -1)
{
const int err = errno;
tr_err (_("Couldn't truncate \"%1$s\": %2$s"), filename, tr_strerror (err));
return err;
}
}
if (writable && !alreadyExisted && (allocation == TR_PREALLOCATE_SPARSE))
preallocate_file_sparse (o->fd, file_size);
/* Many (most?) clients request blocks in ascending order,
* so increase the readahead buffer.
* Also, disable OS-level caching because "inactive memory" angers users. */
tr_set_file_for_single_pass (o->fd);
return 0;
}
/***
****
***/
struct tr_fileset
{
struct tr_cached_file * begin;
const struct tr_cached_file * end;
};
static void
fileset_construct (struct tr_fileset * set, int n)
{
struct tr_cached_file * o;
const struct tr_cached_file TR_CACHED_FILE_INIT = { 0, -1, 0, 0, 0 };
set->begin = tr_new (struct tr_cached_file, n);
set->end = set->begin + n;
for (o=set->begin; o!=set->end; ++o)
*o = TR_CACHED_FILE_INIT;
}
static void
fileset_close_all (struct tr_fileset * set)
{
struct tr_cached_file * o;
if (set != NULL)
for (o=set->begin; o!=set->end; ++o)
if (cached_file_is_open (o))
cached_file_close (o);
}
static void
fileset_destruct (struct tr_fileset * set)
{
fileset_close_all (set);
tr_free (set->begin);
set->end = set->begin = NULL;
}
static void
fileset_close_torrent (struct tr_fileset * set, int torrent_id)
{
struct tr_cached_file * o;
if (set != NULL)
for (o=set->begin; o!=set->end; ++o)
if ((o->torrent_id == torrent_id) && cached_file_is_open (o))
cached_file_close (o);
}
static struct tr_cached_file *
fileset_lookup (struct tr_fileset * set, int torrent_id, tr_file_index_t i)
{
struct tr_cached_file * o;
if (set != NULL)
for (o=set->begin; o!=set->end; ++o)
if ((torrent_id == o->torrent_id) && (i == o->file_index) && cached_file_is_open (o))
return o;
return NULL;
}
static struct tr_cached_file *
fileset_get_empty_slot (struct tr_fileset * set)
{
struct tr_cached_file * cull = NULL;
if (set->begin != NULL)
{
struct tr_cached_file * o;
/* try to find an unused slot */
for (o=set->begin; o!=set->end; ++o)
if (!cached_file_is_open (o))
return o;
/* all slots are full... recycle the least recently used */
for (cull=NULL, o=set->begin; o!=set->end; ++o)
if (!cull || o->used_at < cull->used_at)
cull = o;
cached_file_close (cull);
}
return cull;
}
/***
****
**** Startup / Shutdown
****
***/
struct tr_fdInfo
{
int peerCount;
struct tr_fileset fileset;
};
static void
ensureSessionFdInfoExists (tr_session * session)
{
assert (tr_isSession (session));
if (session->fdInfo == NULL)
{
struct rlimit limit;
struct tr_fdInfo * i;
const int FILE_CACHE_SIZE = 32;
/* Create the local file cache */
i = tr_new0 (struct tr_fdInfo, 1);
fileset_construct (&i->fileset, FILE_CACHE_SIZE);
session->fdInfo = i;
/* set the open-file limit to the largest safe size wrt FD_SETSIZE */
if (!getrlimit (RLIMIT_NOFILE, &limit))
{
const int old_limit = (int) limit.rlim_cur;
const int new_limit = MIN (limit.rlim_max, FD_SETSIZE);
if (new_limit != old_limit)
{
limit.rlim_cur = new_limit;
setrlimit (RLIMIT_NOFILE, &limit);
getrlimit (RLIMIT_NOFILE, &limit);
tr_inf ("Changed open file limit from %d to %d", old_limit, (int)limit.rlim_cur);
}
}
}
}
void
tr_fdClose (tr_session * session)
{
if (session && session->fdInfo)
{
struct tr_fdInfo * i = session->fdInfo;
fileset_destruct (&i->fileset);
tr_free (i);
session->fdInfo = NULL;
}
}
/***
****
***/
static struct tr_fileset*
get_fileset (tr_session * session)
{
if (!session)
return NULL;
ensureSessionFdInfoExists (session);
return &session->fdInfo->fileset;
}
void
tr_fdFileClose (tr_session * s, const tr_torrent * tor, tr_file_index_t i)
{
struct tr_cached_file * o;
if ((o = fileset_lookup (get_fileset (s), tr_torrentId (tor), i)))
{
/* flush writable files so that their mtimes will be
* up-to-date when this function returns to the caller... */
if (o->is_writable)
tr_fsync (o->fd);
cached_file_close (o);
}
}
int
tr_fdFileGetCached (tr_session * s, int torrent_id, tr_file_index_t i, bool writable)
{
struct tr_cached_file * o = fileset_lookup (get_fileset (s), torrent_id, i);
if (!o || (writable && !o->is_writable))
return -1;
o->used_at = tr_time ();
return o->fd;
}
#ifdef SYS_DARWIN
#define TR_STAT_MTIME(sb)((sb).st_mtimespec.tv_sec)
#else
#define TR_STAT_MTIME(sb)((sb).st_mtime)
#endif
bool
tr_fdFileGetCachedMTime (tr_session * s, int torrent_id, tr_file_index_t i, time_t * mtime)
{
bool success;
struct stat sb;
struct tr_cached_file * o = fileset_lookup (get_fileset (s), torrent_id, i);
if ((success = (o != NULL) && !fstat (o->fd, &sb)))
*mtime = TR_STAT_MTIME (sb);
return success;
}
void
tr_fdTorrentClose (tr_session * session, int torrent_id)
{
fileset_close_torrent (get_fileset (session), torrent_id);
}
/* returns an fd on success, or a -1 on failure and sets errno */
int
tr_fdFileCheckout (tr_session * session,
int torrent_id,
tr_file_index_t i,
const char * filename,
bool writable,
tr_preallocation_mode allocation,
uint64_t file_size)
{
struct tr_fileset * set = get_fileset (session);
struct tr_cached_file * o = fileset_lookup (set, torrent_id, i);
if (o && writable && !o->is_writable)
cached_file_close (o); /* close it so we can reopen in rw mode */
else if (!o)
o = fileset_get_empty_slot (set);
if (!cached_file_is_open (o))
{
const int err = cached_file_open (o, filename, writable, allocation, file_size);
if (err)
{
errno = err;
return -1;
}
dbgmsg ("opened '%s' writable %c", filename, writable?'y':'n');
o->is_writable = writable;
}
dbgmsg ("checking out '%s'", filename);
o->torrent_id = torrent_id;
o->file_index = i;
o->used_at = tr_time ();
return o->fd;
}
/***
****
**** Sockets
****
***/
int
tr_fdSocketCreate (tr_session * session, int domain, int type)
{
int s = -1;
struct tr_fdInfo * gFd;
assert (tr_isSession (session));
ensureSessionFdInfoExists (session);
gFd = session->fdInfo;
if (gFd->peerCount < session->peerLimit)
if ((s = socket (domain, type, 0)) < 0)
if (sockerrno != EAFNOSUPPORT)
tr_err (_("Couldn't create socket: %s"), tr_strerror (sockerrno));
if (s > -1)
++gFd->peerCount;
assert (gFd->peerCount >= 0);
if (s >= 0)
{
static bool buf_logged = false;
if (!buf_logged)
{
int i;
socklen_t size = sizeof (int);
buf_logged = true;
getsockopt (s, SOL_SOCKET, SO_SNDBUF, &i, &size);
tr_dbg ("SO_SNDBUF size is %d", i);
getsockopt (s, SOL_SOCKET, SO_RCVBUF, &i, &size);
tr_dbg ("SO_RCVBUF size is %d", i);
}
}
return s;
}
int
tr_fdSocketAccept (tr_session * s, int sockfd, tr_address * addr, tr_port * port)
{
int fd;
unsigned int len;
struct tr_fdInfo * gFd;
struct sockaddr_storage sock;
assert (tr_isSession (s));
assert (addr);
assert (port);
ensureSessionFdInfoExists (s);
gFd = s->fdInfo;
len = sizeof (struct sockaddr_storage);
fd = accept (sockfd, (struct sockaddr *) &sock, &len);
if (fd >= 0)
{
if ((gFd->peerCount < s->peerLimit)
&& tr_address_from_sockaddr_storage (addr, port, &sock))
{
++gFd->peerCount;
}
else
{
tr_netCloseSocket (fd);
fd = -1;
}
}
return fd;
}
void
tr_fdSocketClose (tr_session * session, int fd)
{
assert (tr_isSession (session));
if (session->fdInfo != NULL)
{
struct tr_fdInfo * gFd = session->fdInfo;
if (fd >= 0)
{
tr_netCloseSocket (fd);
--gFd->peerCount;
}
assert (gFd->peerCount >= 0);
}
}