1
0
Fork 0
mirror of https://github.com/transmission/transmission synced 2025-03-15 16:29:34 +00:00

(2.0x trunk) #3397 "checksum errors when downloading files whose names are encoded in iso-8859-1" -- fixed

This commit is contained in:
Charles Kerr 2010-07-07 16:48:23 +00:00
parent 93bb7d6b3c
commit 91d8cbc383
5 changed files with 76 additions and 45 deletions

View file

@ -104,7 +104,7 @@ fi
AC_HEADER_STDC
AC_HEADER_TIME
AC_CHECK_FUNCS([pread pwrite lrintf strlcpy daemon dirname basename strcasecmp localtime_r fallocate64 posix_fallocate memmem strtold syslog valloc getpagesize posix_memalign clearenv])
AC_CHECK_FUNCS([iconv_open pread pwrite lrintf strlcpy daemon dirname basename strcasecmp localtime_r fallocate64 posix_fallocate memmem strtold syslog valloc getpagesize posix_memalign clearenv])
AC_PROG_INSTALL
AC_PROG_MAKE_SET
ACX_PTHREAD

View file

@ -169,7 +169,7 @@ getfile( char ** setme, const char * root, tr_benc * path )
}
}
*setme = tr_utf8clean( (char*)EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ), NULL );
*setme = tr_utf8clean( (char*)EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
/* fprintf( stderr, "[%s]\n", *setme ); */
evbuffer_free( buf );
success = TRUE;
@ -412,7 +412,6 @@ tr_metainfoParseImpl( const tr_session * session,
tr_benc * d;
tr_benc * infoDict = NULL;
tr_benc * meta = (tr_benc *) meta_in;
tr_bool err;
tr_bool b;
tr_bool isMagnet = FALSE;
@ -471,7 +470,7 @@ tr_metainfoParseImpl( const tr_session * session,
if( !str || !*str )
return "name";
tr_free( inf->name );
inf->name = tr_utf8clean( str, -1, &err );
inf->name = tr_utf8clean( str, -1 );
}
/* comment */
@ -479,14 +478,14 @@ tr_metainfoParseImpl( const tr_session * session,
if( !tr_bencDictFindStr( meta, "comment", &str ) )
str = "";
tr_free( inf->comment );
inf->comment = tr_utf8clean( str, -1, &err );
inf->comment = tr_utf8clean( str, -1 );
/* created by */
if( !tr_bencDictFindStr( meta, "created by.utf-8", &str ) )
if( !tr_bencDictFindStr( meta, "created by", &str ) )
str = "";
tr_free( inf->creator );
inf->creator = tr_utf8clean( str, -1, &err );
inf->creator = tr_utf8clean( str, -1 );
/* creation date */
if( !tr_bencDictFindInt( meta, "creation date", &i ) )

View file

@ -174,36 +174,31 @@ test_utf8( void )
{
const char * in;
char * out;
tr_bool err;
in = "hello world";
out = tr_utf8clean( in, -1, &err );
check( err == FALSE )
out = tr_utf8clean( in, -1 );
check( out != NULL )
check( !strcmp( out, in ) )
tr_free( out );
in = "hello world";
out = tr_utf8clean( in, 5, &err );
check( err == FALSE )
out = tr_utf8clean( in, 5 );
check( out != NULL )
check( !strcmp( out, "hello" ) )
tr_free( out );
/* this version is not utf-8 */
in = "Òðóäíî áûòü Áîãîì";
out = tr_utf8clean( in, 17, &err );
out = tr_utf8clean( in, 17 );
check( out != NULL )
check( err != 0 )
check( strlen( out ) == 17 )
check( tr_utf8_validate( out, -1, NULL ) )
tr_free( out );
/* same string, but utf-8 clean */
in = "Òðóäíî áûòü Ã<>îãîì";
out = tr_utf8clean( in, -1, &err );
out = tr_utf8clean( in, -1 );
check( out != NULL )
check( !err );
check( tr_utf8_validate( out, -1, NULL ) )
check ( !strcmp( in, out ) )
tr_free( out );

View file

@ -16,6 +16,7 @@
#if defined(SYS_DARWIN)
#define HAVE_GETPAGESIZE
#define HAVE_ICONV_OPEN
#define HAVE_VALLOC
#undef HAVE_POSIX_MEMALIGN /* not supported on OS X 10.5 and lower */
#endif
@ -30,6 +31,9 @@
#include <string.h> /* strerror(), memset(), memmem() */
#include <time.h> /* nanosleep() */
#ifdef HAVE_ICONV_OPEN
#include <iconv.h>
#endif
#include <libgen.h> /* basename() */
#include <sys/time.h>
#include <sys/types.h>
@ -1161,8 +1165,68 @@ tr_lowerBound( const void * key,
****
***/
static char*
strip_non_utf8( const char * in, size_t inlen )
{
char * ret;
const char * end;
const char zero = '\0';
struct evbuffer * buf = evbuffer_new( );
while( !tr_utf8_validate( in, inlen, &end ) )
{
const int good_len = end - in;
evbuffer_add( buf, in, good_len );
inlen -= ( good_len + 1 );
in += ( good_len + 1 );
evbuffer_add( buf, "?", 1 );
}
evbuffer_add( buf, in, inlen );
evbuffer_add( buf, &zero, 1 );
ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
evbuffer_free( buf );
return ret;
}
static char*
to_utf8( const char * in, size_t inlen )
{
char * ret = NULL;
#ifdef HAVE_ICONV_OPEN
int i;
const char * encodings[] = { "CURRENT", "ISO-8859-15" };
const int encoding_count = sizeof(encodings) / sizeof(encodings[1]);
const size_t buflen = inlen*4 + 10;
char * out = tr_new( char, buflen );
for( i=0; !ret && i<encoding_count; ++i )
{
char * inbuf = (char*) in;
char * outbuf = out;
size_t inbytesleft = inlen;
size_t outbytesleft = buflen;
const char * test_encoding = encodings[i];
iconv_t cd = iconv_open( "UTF-8", test_encoding );
if( cd != (iconv_t)-1 ) {
if( iconv( cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft ) != (size_t)-1 )
ret = tr_strndup( out, buflen-outbytesleft );
iconv_close( cd );
}
}
#endif
if( ret == NULL )
ret = strip_non_utf8( in, inlen );
return ret;
}
char*
tr_utf8clean( const char * str, int max_len, tr_bool * err )
tr_utf8clean( const char * str, int max_len )
{
char * ret;
const char * end;
@ -1170,36 +1234,10 @@ tr_utf8clean( const char * str, int max_len, tr_bool * err )
if( max_len < 0 )
max_len = (int) strlen( str );
if( err != NULL )
*err = FALSE;
if( tr_utf8_validate( str, max_len, &end ) )
{
ret = tr_strndup( str, max_len );
}
else
{
const char zero = '\0';
struct evbuffer * buf = evbuffer_new( );
while( !tr_utf8_validate ( str, max_len, &end ) )
{
const int good_len = end - str;
evbuffer_add( buf, str, good_len );
max_len -= ( good_len + 1 );
str += ( good_len + 1 );
evbuffer_add( buf, "?", 1 );
if( err != NULL )
*err = TRUE;
}
evbuffer_add( buf, str, max_len );
evbuffer_add( buf, &zero, 1 );
ret = tr_memdup( EVBUFFER_DATA( buf ), EVBUFFER_LENGTH( buf ) );
evbuffer_free( buf );
}
ret = to_utf8( str, max_len );
assert( tr_utf8_validate( ret, -1, NULL ) );
return ret;

View file

@ -275,9 +275,8 @@ void tr_wait_msec( long int delay_milliseconds );
* @return a newly-allocated string that must be freed with tr_free()
* @param str the string to make a clean copy of
* @param len the length of the string to copy. If -1, the entire string is used.
* @param err if an error occurs and err is non-NULL, it's set to TRUE.
*/
char* tr_utf8clean( const char * str, int len, tr_bool * err ) TR_GNUC_MALLOC;
char* tr_utf8clean( const char * str, int len ) TR_GNUC_MALLOC;
/***