1
0
Fork 0
mirror of https://github.com/transmission/transmission synced 2025-01-31 03:12:44 +00:00

(trunk libT) #1675: better utf8 validation of metainfo name, creator, and comment fields

This commit is contained in:
Charles Kerr 2009-01-10 22:48:58 +00:00
parent d4c5e904b5
commit 92ee6566db
6 changed files with 159 additions and 9 deletions

View file

@ -39,11 +39,12 @@
------------------------------------------------------------------------ */
#include "ConvertUTF.h"
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#include <stdio.h>
#endif
#include <string.h> /* strlen() */
#include <unistd.h> /* ssize_t */
#include "ConvertUTF.h"
static const int halfShift = 10; /* used for shifting by 10 bits */
@ -345,6 +346,56 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
}
}
/**
* This is a variation of isLegalUTF8Sequence() that behaves like g_utf8_validate().
* In addition to knowing if the sequence is legal, it also tells you the last good character.
*/
Boolean
tr_utf8_validate( const char * str, ssize_t max_len, const char ** end )
{
const UTF8* source = (const UTF8*) str;
const UTF8* sourceEnd = source;
if( max_len == 0 )
return true;
if( str == NULL )
return false;
sourceEnd = source + ((max_len < 0) ? strlen(str) : (size_t)max_len);
if( source == sourceEnd )
{
if( end != NULL )
*end = (const char*) source;
return true;
}
for( ;; )
{
const int length = trailingBytesForUTF8[*source] + 1;
if (source + length > sourceEnd) {
if( end != NULL )
*end = (const char*) source;
return false;
}
if (!isLegalUTF8(source, length)) {
if( end != NULL )
*end = (const char*) source;
return false;
}
source += length;
if (source >= sourceEnd) {
if( end != NULL )
*end = (const char*) source;
return true;
}
}
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (

View file

@ -5,6 +5,8 @@
#error only libtransmission should #include this header.
#endif
#include <unistd.h> /* ssize_t */
/*
* Copyright 2001-2004 Unicode, Inc.
*
@ -149,6 +151,11 @@ ConversionResult ConvertUTF32toUTF16 (
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
/* intended to work the same as g_utf8_validate */
Boolean tr_utf8_validate( const char * str, ssize_t max_len, const char ** end );
#ifdef __cplusplus
}
#endif

View file

@ -124,9 +124,9 @@ tr_metainfoMigrate( tr_session * session,
***/
static int
getfile( char ** setme,
const char * root,
tr_benc * path )
getfile( char ** setme,
const char * root,
tr_benc * path )
{
int err;
@ -356,6 +356,7 @@ tr_metainfoParseImpl( const tr_session * session,
const uint8_t * raw;
tr_benc * beInfo;
tr_benc * meta = (tr_benc *) meta_in;
tr_bool err;
/* info_hash: urlencoded 20-byte SHA1 hash of the value of the info key
* from the Metainfo file. Note that the value will be a bencoded
@ -378,21 +379,21 @@ tr_metainfoParseImpl( const tr_session * session,
if( !str || !*str )
return "name";
tr_free( inf->name );
inf->name = tr_strdup( str );
inf->name = tr_utf8clean( str, -1, &err );
/* comment */
if( !tr_bencDictFindStr( meta, "comment.utf-8", &str ) )
if( !tr_bencDictFindStr( meta, "comment", &str ) )
str = "";
tr_free( inf->comment );
inf->comment = tr_strdup( str );
inf->comment = tr_utf8clean( str, -1, &err );
/* created by */
if( !tr_bencDictFindStr( meta, "created by.utf-8", &str ) )
if( !tr_bencDictFindStr( meta, "created by", &str ) )
str = "";
tr_free( inf->creator );
inf->creator = tr_strdup( str );
inf->creator = tr_utf8clean( str, -1, &err );
/* creation date */
if( !tr_bencDictFindInt( meta, "creation date", &i ) )

View file

@ -1,6 +1,8 @@
#include <stdio.h> /* fprintf */
#include <string.h> /* strcmp */
#include "transmission.h"
#include <unistd.h> /* ssize_t */
#include "ConvertUTF.h" /* tr_utf8_validate*/
#include "platform.h"
#include "utils.h"
#include "crypto.h"
@ -135,6 +137,48 @@ test_buildpath( void )
return 0;
}
static int
test_utf8( void )
{
const char * in;
char * out;
tr_bool err;
in = "hello world";
out = tr_utf8clean( in, -1, &err );
check( err == FALSE )
check( out != NULL )
check( !strcmp( out, in ) )
tr_free( out );
in = "hello world";
out = tr_utf8clean( in, 5, &err );
check( err == FALSE )
check( out != NULL )
check( !strcmp( out, "hello" ) )
tr_free( out );
/* this version is not utf-8 */
in = "Òðóäíî áûòü Áîãîì";
out = tr_utf8clean( in, 17, &err );
check( out != NULL )
check( err != 0 )
check( strlen( out ) == 17 )
check( tr_utf8_validate( out, -1, NULL ) )
tr_free( out );
/* same string, but utf-8 clean */
in = "Òðóäíî áûòü Ã<>îãîì";
out = tr_utf8clean( in, -1, &err );
check( out != NULL )
check( !err );
check( tr_utf8_validate( out, -1, NULL ) )
check ( !strcmp( in, out ) )
tr_free( out );
return 0;
}
int
main( void )
{
@ -160,6 +204,8 @@ main( void )
return i;
if( ( i = test_buildpath( ) ) )
return i;
if( ( i = test_utf8( ) ) )
return i;
/* test that tr_cryptoRandInt() stays in-bounds */
for( i = 0; i < 100000; ++i )

View file

@ -32,6 +32,7 @@
#endif
#include "transmission.h"
#include "ConvertUTF.h"
#include "list.h"
#include "utils.h"
#include "platform.h"
@ -1319,3 +1320,42 @@ tr_lowerBound( const void * key,
return first;
}
/***
****
***/
char*
tr_utf8clean( const char * str, ssize_t max_len, tr_bool * err )
{
const char zero = '\0';
char * ret;
struct evbuffer * buf = evbuffer_new( );
const char * end;
if( err != NULL )
*err = FALSE;
if( max_len < 0 )
max_len = (ssize_t) strlen( str );
while( !tr_utf8_validate ( str, max_len, &end ) )
{
const ssize_t good_len = end - str;
evbuffer_add( buf, str, good_len );
max_len -= ( good_len + 1 );
str += ( good_len + 1 );
evbuffer_add( buf, "?", 1 );
if( err != NULL )
*err = TRUE;
}
evbuffer_add( buf, str, max_len );
evbuffer_add( buf, &zero, 1 );
ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
assert( tr_utf8_validate( ret, -1, NULL ) );
evbuffer_free( buf );
return ret;
}

View file

@ -237,6 +237,11 @@ uint64_t tr_date( void );
/* wait the specified number of milliseconds */
void tr_wait( uint64_t delay_milliseconds );
char* tr_utf8clean( const char * str,
ssize_t max_len,
tr_bool * err );
/***
****
***/