(trunk libT) #2955 "verify pieces only when necessary, or when the user requests it." -- improvements to .resume file

As pointed out by longinus00 and ijuxda, storing per-piece timestamps in the .resume file can involve a lot of overhead. This commit reduces the overhead by adding a couple of optimizations: (1) in cases where *all* or *none* of the files' pieces were checked after the file's mtime, we can safely fold all the pieces' mtimes into a single per-file mtime. (2) since unix time takes up a lot of space when rendered as a benc integer, find a common per-file "baseline" number, then store the pieces' timestamps as offsets from that number.  Also add documentation explaining this new format, and also better explaining the pre-2.20 progress format.
This commit is contained in:
Jordan Lee 2011-02-02 21:17:16 +00:00
parent 5fc1ff8855
commit 06948a8514
3 changed files with 137 additions and 61 deletions

View File

@ -407,98 +407,172 @@ loadIdleLimits( tr_benc * dict,
***/
static void
saveProgress( tr_benc * dict,
const tr_torrent * tor )
saveProgress( tr_benc * dict, const tr_torrent * tor )
{
size_t i, n;
tr_benc * p;
tr_benc * m;
const tr_bitfield * bitfield;
tr_benc * l;
tr_benc * prog;
tr_file_index_t fi;
const struct tr_bitfield * bitfield;
const tr_info * inf = tr_torrentInfo( tor );
const time_t now = tr_time( );
p = tr_bencDictAdd( dict, KEY_PROGRESS );
tr_bencInitDict( p, 2 );
prog = tr_bencDictAddDict( dict, KEY_PROGRESS, 3 );
/* add each piece's timeChecked */
n = tor->info.pieceCount;
m = tr_bencDictAddList( p, KEY_PROGRESS_CHECKTIME, n );
for( i=0; i<n; ++i )
tr_bencListAddInt( m, tor->info.pieces[i].timeChecked );
/* add the file/piece check timestamps... */
l = tr_bencDictAddList( prog, KEY_PROGRESS_CHECKTIME, inf->fileCount );
for( fi=0; fi<inf->fileCount; ++fi )
{
const tr_piece * p;
const tr_piece * pend;
time_t oldest_nonzero = now;
time_t newest = 0;
tr_bool has_zero = FALSE;
const time_t mtime = tr_torrentGetFileMTime( tor, fi );
const tr_file * f = &inf->files[fi];
/* get the oldest and newest nonzero timestamps for pieces in this file */
for( p=&inf->pieces[f->firstPiece], pend=&inf->pieces[f->lastPiece]; p!=pend; ++p )
{
if( !p->timeChecked )
has_zero = TRUE;
else if( oldest_nonzero > p->timeChecked )
oldest_nonzero = p->timeChecked;
if( newest < p->timeChecked )
newest = p->timeChecked;
}
/* If some of a file's pieces have been checked more recently than
the file's mtime, and some lest recently, then that file will
have a list containing timestamps for each piece.
However, the most common use case is that the file doesn't change
after it's downloaded. To reduce overhead in the .resume file,
only a single timestamp is saved for the file if *all* or *none*
of the pieces were tested more recently than the file's mtime. */
if( !has_zero && ( mtime <= oldest_nonzero ) ) /* all checked */
tr_bencListAddInt( l, oldest_nonzero );
else if( newest < mtime ) /* none checked */
tr_bencListAddInt( l, newest );
else { /* some are checked, some aren't... so list piece by piece */
const int offset = oldest_nonzero - 1;
tr_benc * ll = tr_bencListAddList( l, 2 + f->lastPiece - f->firstPiece );
tr_bencListAddInt( ll, offset );
for( p=&inf->pieces[f->firstPiece], pend=&inf->pieces[f->lastPiece]; p!=pend; ++p )
tr_bencListAddInt( ll, p->timeChecked ? p->timeChecked - offset : 0 );
}
}
/* add the progress */
if( tor->completeness == TR_SEED )
tr_bencDictAddStr( p, KEY_PROGRESS_HAVE, "all" );
tr_bencDictAddStr( prog, KEY_PROGRESS_HAVE, "all" );
/* add the pieces bitfield */
bitfield = tr_cpBlockBitfield( &tor->completion );
tr_bencDictAddRaw( p, KEY_PROGRESS_BITFIELD,
bitfield->bits, bitfield->byteCount );
tr_bencDictAddRaw( prog, KEY_PROGRESS_BITFIELD, bitfield->bits,
bitfield->byteCount );
}
static uint64_t
loadProgress( tr_benc * dict,
tr_torrent * tor )
loadProgress( tr_benc * dict, tr_torrent * tor )
{
size_t i, n;
uint64_t ret = 0;
tr_benc * p;
size_t i, n;
uint64_t ret = 0;
tr_benc * prog;
const tr_info * inf = tr_torrentInfo( tor );
for( i=0, n=tor->info.pieceCount; i<n; ++i )
tor->info.pieces[i].timeChecked = 0;
for( i=0, n=inf->pieceCount; i<n; ++i )
inf->pieces[i].timeChecked = 0;
if( tr_bencDictFindDict( dict, KEY_PROGRESS, &p ) )
if( tr_bencDictFindDict( dict, KEY_PROGRESS, &prog ) )
{
const char * err;
const char * str;
const uint8_t * raw;
size_t rawlen;
tr_benc * m;
int64_t timeChecked;
size_t rawlen;
tr_benc * l;
if( tr_bencDictFindList( p, KEY_PROGRESS_CHECKTIME, &m ) )
if( tr_bencDictFindList( prog, KEY_PROGRESS_CHECKTIME, &l ) )
{
/* This key was added in 2.20.
Load in the timestamp of when we last checked each piece */
for( i=0, n=tor->info.pieceCount; i<n; ++i )
if( tr_bencGetInt( tr_bencListChild( m, i ), &timeChecked ) )
tor->info.pieces[i].timeChecked = (time_t)timeChecked;
}
else if( tr_bencDictFindList( p, KEY_PROGRESS_MTIMES, &m ) )
{
/* This is how it was done pre-2.20... per file. */
for( i=0, n=tr_bencListSize(m); i<n; ++i )
/* per-piece timestamps were added in 2.20.
If some of a file's pieces have been checked more recently than
the file's mtime, and some lest recently, then that file will
have a list containing timestamps for each piece.
However, the most common use case is that the file doesn't change
after it's downloaded. To reduce overhead in the .resume file,
only a single timestamp is saved for the file if *all* or *none*
of the pieces were tested more recently than the file's mtime. */
tr_file_index_t fi;
for( fi=0; fi<inf->fileCount; ++fi )
{
/* get the timestamp of file #i */
if( tr_bencGetInt( tr_bencListChild( m, i ), &timeChecked ) )
tr_benc * b = tr_bencListChild( l, fi );
const tr_file * f = &inf->files[fi];
tr_piece * p = &inf->pieces[f->firstPiece];
const tr_piece * pend = &inf->pieces[f->lastPiece];
if( tr_bencIsInt( b ) )
{
/* walk through all the pieces that are in that file... */
tr_piece_index_t j;
tr_file * file = &tor->info.files[i];
for( j=file->firstPiece; j<=file->lastPiece; ++j )
int64_t t;
tr_bencGetInt( b, &t );
for( ; p!=pend; ++p )
p->timeChecked = (time_t)t;
}
else if( tr_bencIsList( b ) )
{
int i = 0;
int64_t offset = 0;
const int pieces = f->lastPiece + 1 - f->firstPiece;
tr_bencGetInt( tr_bencListChild( b, 0 ), &offset );
for( i=0; i<pieces; ++i )
{
tr_piece * piece = &tor->info.pieces[j];
/* If the piece's timestamp is unset from earlier,
* set it here. */
if( piece->timeChecked == 0 )
piece->timeChecked = timeChecked;
/* If the piece's timestamp is *newer* timeChecked,
* the piece probably spans more than one file.
* To be safe, let's use the older timestamp. */
if( piece->timeChecked > timeChecked )
piece->timeChecked = timeChecked;
int64_t t = 0;
tr_bencGetInt( tr_bencListChild( b, i+1 ), &t );
inf->pieces[f->firstPiece+i].timeChecked = (time_t)(t + offset);
}
}
}
}
else if( tr_bencDictFindList( prog, KEY_PROGRESS_MTIMES, &l ) )
{
tr_file_index_t fi;
/* Before 2.20, we stored the files' mtimes in the .resume file.
When loading the .resume file, a torrent's file would be flagged
as untested if its stored mtime didn't match its real mtime. */
for( fi=0; fi<inf->fileCount; ++fi )
{
int64_t t;
if( tr_bencGetInt( tr_bencListChild( l, fi ), &t ) )
{
const tr_file * f = &inf->files[fi];
tr_piece * p = &inf->pieces[f->firstPiece];
const tr_piece * pend = &inf->pieces[f->lastPiece];
const time_t mtime = tr_torrentGetFileMTime( tor, fi );
const time_t timeChecked = mtime==t ? mtime : 0;
for( ; p!=pend; ++p )
p->timeChecked = timeChecked;
}
}
}
err = NULL;
if( tr_bencDictFindStr( p, KEY_PROGRESS_HAVE, &str ) )
if( tr_bencDictFindStr( prog, KEY_PROGRESS_HAVE, &str ) )
{
if( !strcmp( str, "all" ) )
tr_cpSetHaveAll( &tor->completion );
else
err = "Invalid value for HAVE";
}
else if( tr_bencDictFindRaw( p, KEY_PROGRESS_BITFIELD, &raw, &rawlen ) )
else if( tr_bencDictFindRaw( prog, KEY_PROGRESS_BITFIELD, &raw, &rawlen ) )
{
tr_bitfield tmp;
tmp.byteCount = rawlen;

View File

@ -2388,8 +2388,8 @@ tr_torrentCheckPiece( tr_torrent * tor, tr_piece_index_t pieceIndex )
return pass;
}
static time_t
getFileMTime( const tr_torrent * tor, tr_file_index_t i )
time_t
tr_torrentGetFileMTime( const tr_torrent * tor, tr_file_index_t i )
{
struct stat sb;
time_t mtime = 0;
@ -2425,7 +2425,7 @@ tr_torrentPieceNeedsCheck( const tr_torrent * tor, tr_piece_index_t p )
tr_ioFindFileLocation( tor, p, 0, &f, &unused );
for( ; f < inf->fileCount && pieceHasFile( p, &inf->files[f] ); ++f )
if( tr_cpFileIsComplete( &tor->completion, f ) )
if( getFileMTime( tor, f ) > inf->pieces[p].timeChecked )
if( tr_torrentGetFileMTime( tor, f ) > inf->pieces[p].timeChecked )
return TRUE;
return FALSE;

View File

@ -429,6 +429,8 @@ tr_bool tr_torrentPieceNeedsCheck( const tr_torrent * tor, tr_piece_index_t piec
*/
tr_bool tr_torrentCheckPiece( tr_torrent * tor, tr_piece_index_t pieceIndex );
time_t tr_torrentGetFileMTime( const tr_torrent * tor, tr_file_index_t i );
uint64_t tr_torrentGetCurrentSizeOnDisk( const tr_torrent * tor );