mirror of
https://github.com/transmission/transmission
synced 2025-01-31 03:12:44 +00:00
(trunk libT) #1675: better utf8 validation of metainfo name, creator, and comment fields
This commit is contained in:
parent
d4c5e904b5
commit
92ee6566db
6 changed files with 159 additions and 9 deletions
|
@ -39,11 +39,12 @@
|
|||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
#ifdef CVTUTF_DEBUG
|
||||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include <string.h> /* strlen() */
|
||||
#include <unistd.h> /* ssize_t */
|
||||
#include "ConvertUTF.h"
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
|
@ -345,6 +346,56 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a variation of isLegalUTF8Sequence() that behaves like g_utf8_validate().
|
||||
* In addition to knowing if the sequence is legal, it also tells you the last good character.
|
||||
*/
|
||||
Boolean
|
||||
tr_utf8_validate( const char * str, ssize_t max_len, const char ** end )
|
||||
{
|
||||
const UTF8* source = (const UTF8*) str;
|
||||
const UTF8* sourceEnd = source;
|
||||
|
||||
if( max_len == 0 )
|
||||
return true;
|
||||
|
||||
if( str == NULL )
|
||||
return false;
|
||||
|
||||
sourceEnd = source + ((max_len < 0) ? strlen(str) : (size_t)max_len);
|
||||
|
||||
if( source == sourceEnd )
|
||||
{
|
||||
if( end != NULL )
|
||||
*end = (const char*) source;
|
||||
return true;
|
||||
}
|
||||
|
||||
for( ;; )
|
||||
{
|
||||
const int length = trailingBytesForUTF8[*source] + 1;
|
||||
if (source + length > sourceEnd) {
|
||||
if( end != NULL )
|
||||
*end = (const char*) source;
|
||||
return false;
|
||||
}
|
||||
if (!isLegalUTF8(source, length)) {
|
||||
if( end != NULL )
|
||||
*end = (const char*) source;
|
||||
return false;
|
||||
}
|
||||
source += length;
|
||||
if (source >= sourceEnd) {
|
||||
if( end != NULL )
|
||||
*end = (const char*) source;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#error only libtransmission should #include this header.
|
||||
#endif
|
||||
|
||||
#include <unistd.h> /* ssize_t */
|
||||
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
|
@ -149,6 +151,11 @@ ConversionResult ConvertUTF32toUTF16 (
|
|||
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||
|
||||
|
||||
/* intended to work the same as g_utf8_validate */
|
||||
Boolean tr_utf8_validate( const char * str, ssize_t max_len, const char ** end );
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -124,9 +124,9 @@ tr_metainfoMigrate( tr_session * session,
|
|||
***/
|
||||
|
||||
static int
|
||||
getfile( char ** setme,
|
||||
const char * root,
|
||||
tr_benc * path )
|
||||
getfile( char ** setme,
|
||||
const char * root,
|
||||
tr_benc * path )
|
||||
{
|
||||
int err;
|
||||
|
||||
|
@ -356,6 +356,7 @@ tr_metainfoParseImpl( const tr_session * session,
|
|||
const uint8_t * raw;
|
||||
tr_benc * beInfo;
|
||||
tr_benc * meta = (tr_benc *) meta_in;
|
||||
tr_bool err;
|
||||
|
||||
/* info_hash: urlencoded 20-byte SHA1 hash of the value of the info key
|
||||
* from the Metainfo file. Note that the value will be a bencoded
|
||||
|
@ -378,21 +379,21 @@ tr_metainfoParseImpl( const tr_session * session,
|
|||
if( !str || !*str )
|
||||
return "name";
|
||||
tr_free( inf->name );
|
||||
inf->name = tr_strdup( str );
|
||||
inf->name = tr_utf8clean( str, -1, &err );
|
||||
|
||||
/* comment */
|
||||
if( !tr_bencDictFindStr( meta, "comment.utf-8", &str ) )
|
||||
if( !tr_bencDictFindStr( meta, "comment", &str ) )
|
||||
str = "";
|
||||
tr_free( inf->comment );
|
||||
inf->comment = tr_strdup( str );
|
||||
inf->comment = tr_utf8clean( str, -1, &err );
|
||||
|
||||
/* created by */
|
||||
if( !tr_bencDictFindStr( meta, "created by.utf-8", &str ) )
|
||||
if( !tr_bencDictFindStr( meta, "created by", &str ) )
|
||||
str = "";
|
||||
tr_free( inf->creator );
|
||||
inf->creator = tr_strdup( str );
|
||||
inf->creator = tr_utf8clean( str, -1, &err );
|
||||
|
||||
/* creation date */
|
||||
if( !tr_bencDictFindInt( meta, "creation date", &i ) )
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#include <stdio.h> /* fprintf */
|
||||
#include <string.h> /* strcmp */
|
||||
#include "transmission.h"
|
||||
#include <unistd.h> /* ssize_t */
|
||||
#include "ConvertUTF.h" /* tr_utf8_validate*/
|
||||
#include "platform.h"
|
||||
#include "utils.h"
|
||||
#include "crypto.h"
|
||||
|
@ -135,6 +137,48 @@ test_buildpath( void )
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
test_utf8( void )
|
||||
{
|
||||
const char * in;
|
||||
char * out;
|
||||
tr_bool err;
|
||||
|
||||
in = "hello world";
|
||||
out = tr_utf8clean( in, -1, &err );
|
||||
check( err == FALSE )
|
||||
check( out != NULL )
|
||||
check( !strcmp( out, in ) )
|
||||
tr_free( out );
|
||||
|
||||
in = "hello world";
|
||||
out = tr_utf8clean( in, 5, &err );
|
||||
check( err == FALSE )
|
||||
check( out != NULL )
|
||||
check( !strcmp( out, "hello" ) )
|
||||
tr_free( out );
|
||||
|
||||
/* this version is not utf-8 */
|
||||
in = "Òðóäíî áûòü Áîãîì";
|
||||
out = tr_utf8clean( in, 17, &err );
|
||||
check( out != NULL )
|
||||
check( err != 0 )
|
||||
check( strlen( out ) == 17 )
|
||||
check( tr_utf8_validate( out, -1, NULL ) )
|
||||
tr_free( out );
|
||||
|
||||
/* same string, but utf-8 clean */
|
||||
in = "ÒðóäÃî áûòü Ã<>îãîì";
|
||||
out = tr_utf8clean( in, -1, &err );
|
||||
check( out != NULL )
|
||||
check( !err );
|
||||
check( tr_utf8_validate( out, -1, NULL ) )
|
||||
check ( !strcmp( in, out ) )
|
||||
tr_free( out );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main( void )
|
||||
{
|
||||
|
@ -160,6 +204,8 @@ main( void )
|
|||
return i;
|
||||
if( ( i = test_buildpath( ) ) )
|
||||
return i;
|
||||
if( ( i = test_utf8( ) ) )
|
||||
return i;
|
||||
|
||||
/* test that tr_cryptoRandInt() stays in-bounds */
|
||||
for( i = 0; i < 100000; ++i )
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#endif
|
||||
|
||||
#include "transmission.h"
|
||||
#include "ConvertUTF.h"
|
||||
#include "list.h"
|
||||
#include "utils.h"
|
||||
#include "platform.h"
|
||||
|
@ -1319,3 +1320,42 @@ tr_lowerBound( const void * key,
|
|||
|
||||
return first;
|
||||
}
|
||||
|
||||
/***
|
||||
****
|
||||
***/
|
||||
|
||||
char*
|
||||
tr_utf8clean( const char * str, ssize_t max_len, tr_bool * err )
|
||||
{
|
||||
const char zero = '\0';
|
||||
char * ret;
|
||||
struct evbuffer * buf = evbuffer_new( );
|
||||
const char * end;
|
||||
|
||||
if( err != NULL )
|
||||
*err = FALSE;
|
||||
|
||||
if( max_len < 0 )
|
||||
max_len = (ssize_t) strlen( str );
|
||||
|
||||
while( !tr_utf8_validate ( str, max_len, &end ) )
|
||||
{
|
||||
const ssize_t good_len = end - str;
|
||||
|
||||
evbuffer_add( buf, str, good_len );
|
||||
max_len -= ( good_len + 1 );
|
||||
str += ( good_len + 1 );
|
||||
evbuffer_add( buf, "?", 1 );
|
||||
|
||||
if( err != NULL )
|
||||
*err = TRUE;
|
||||
}
|
||||
|
||||
evbuffer_add( buf, str, max_len );
|
||||
evbuffer_add( buf, &zero, 1 );
|
||||
ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
|
||||
assert( tr_utf8_validate( ret, -1, NULL ) );
|
||||
evbuffer_free( buf );
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -237,6 +237,11 @@ uint64_t tr_date( void );
|
|||
/* wait the specified number of milliseconds */
|
||||
void tr_wait( uint64_t delay_milliseconds );
|
||||
|
||||
char* tr_utf8clean( const char * str,
|
||||
ssize_t max_len,
|
||||
tr_bool * err );
|
||||
|
||||
|
||||
/***
|
||||
****
|
||||
***/
|
||||
|
|
Loading…
Reference in a new issue