1
0
Fork 0
mirror of https://github.com/morpheus65535/bazarr synced 2024-12-26 09:37:25 +00:00
bazarr/libs/playhouse/reflection.py
2022-11-07 13:08:27 -05:00

852 lines
30 KiB
Python

try:
from collections import OrderedDict
except ImportError:
OrderedDict = dict
from collections import namedtuple
from inspect import isclass
import re
import warnings
from peewee import *
from peewee import _StringField
from peewee import _query_val_transform
from peewee import CommaNodeList
from peewee import SCOPE_VALUES
from peewee import make_snake_case
from peewee import text_type
try:
from pymysql.constants import FIELD_TYPE
except ImportError:
try:
from MySQLdb.constants import FIELD_TYPE
except ImportError:
FIELD_TYPE = None
try:
from playhouse import postgres_ext
except ImportError:
postgres_ext = None
try:
from playhouse.cockroachdb import CockroachDatabase
except ImportError:
CockroachDatabase = None
RESERVED_WORDS = set([
'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if',
'import', 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', 'raise',
'return', 'try', 'while', 'with', 'yield',
])
class UnknownField(object):
pass
class Column(object):
"""
Store metadata about a database column.
"""
primary_key_types = (IntegerField, AutoField)
def __init__(self, name, field_class, raw_column_type, nullable,
primary_key=False, column_name=None, index=False,
unique=False, default=None, extra_parameters=None):
self.name = name
self.field_class = field_class
self.raw_column_type = raw_column_type
self.nullable = nullable
self.primary_key = primary_key
self.column_name = column_name
self.index = index
self.unique = unique
self.default = default
self.extra_parameters = extra_parameters
# Foreign key metadata.
self.rel_model = None
self.related_name = None
self.to_field = None
def __repr__(self):
attrs = [
'field_class',
'raw_column_type',
'nullable',
'primary_key',
'column_name']
keyword_args = ', '.join(
'%s=%s' % (attr, getattr(self, attr))
for attr in attrs)
return 'Column(%s, %s)' % (self.name, keyword_args)
def get_field_parameters(self):
params = {}
if self.extra_parameters is not None:
params.update(self.extra_parameters)
# Set up default attributes.
if self.nullable:
params['null'] = True
if self.field_class is ForeignKeyField or self.name != self.column_name:
params['column_name'] = "'%s'" % self.column_name
if self.primary_key and not issubclass(self.field_class, AutoField):
params['primary_key'] = True
if self.default is not None:
params['constraints'] = '[SQL("DEFAULT %s")]' % self.default
# Handle ForeignKeyField-specific attributes.
if self.is_foreign_key():
params['model'] = self.rel_model
if self.to_field:
params['field'] = "'%s'" % self.to_field
if self.related_name:
params['backref'] = "'%s'" % self.related_name
# Handle indexes on column.
if not self.is_primary_key():
if self.unique:
params['unique'] = 'True'
elif self.index and not self.is_foreign_key():
params['index'] = 'True'
return params
def is_primary_key(self):
return self.field_class is AutoField or self.primary_key
def is_foreign_key(self):
return self.field_class is ForeignKeyField
def is_self_referential_fk(self):
return (self.field_class is ForeignKeyField and
self.rel_model == "'self'")
def set_foreign_key(self, foreign_key, model_names, dest=None,
related_name=None):
self.foreign_key = foreign_key
self.field_class = ForeignKeyField
if foreign_key.dest_table == foreign_key.table:
self.rel_model = "'self'"
else:
self.rel_model = model_names[foreign_key.dest_table]
self.to_field = dest and dest.name or None
self.related_name = related_name or None
def get_field(self):
# Generate the field definition for this column.
field_params = {}
for key, value in self.get_field_parameters().items():
if isclass(value) and issubclass(value, Field):
value = value.__name__
field_params[key] = value
param_str = ', '.join('%s=%s' % (k, v)
for k, v in sorted(field_params.items()))
field = '%s = %s(%s)' % (
self.name,
self.field_class.__name__,
param_str)
if self.field_class is UnknownField:
field = '%s # %s' % (field, self.raw_column_type)
return field
class Metadata(object):
column_map = {}
extension_import = ''
def __init__(self, database):
self.database = database
self.requires_extension = False
def execute(self, sql, *params):
return self.database.execute_sql(sql, params)
def get_columns(self, table, schema=None):
metadata = OrderedDict(
(metadata.name, metadata)
for metadata in self.database.get_columns(table, schema))
# Look up the actual column type for each column.
column_types, extra_params = self.get_column_types(table, schema)
# Look up the primary keys.
pk_names = self.get_primary_keys(table, schema)
if len(pk_names) == 1:
pk = pk_names[0]
if column_types[pk] is IntegerField:
column_types[pk] = AutoField
elif column_types[pk] is BigIntegerField:
column_types[pk] = BigAutoField
columns = OrderedDict()
for name, column_data in metadata.items():
field_class = column_types[name]
default = self._clean_default(field_class, column_data.default)
columns[name] = Column(
name,
field_class=field_class,
raw_column_type=column_data.data_type,
nullable=column_data.null,
primary_key=column_data.primary_key,
column_name=name,
default=default,
extra_parameters=extra_params.get(name))
return columns
def get_column_types(self, table, schema=None):
raise NotImplementedError
def _clean_default(self, field_class, default):
if default is None or field_class in (AutoField, BigAutoField) or \
default.lower() == 'null':
return
if issubclass(field_class, _StringField) and \
isinstance(default, text_type) and not default.startswith("'"):
default = "'%s'" % default
return default or "''"
def get_foreign_keys(self, table, schema=None):
return self.database.get_foreign_keys(table, schema)
def get_primary_keys(self, table, schema=None):
return self.database.get_primary_keys(table, schema)
def get_indexes(self, table, schema=None):
return self.database.get_indexes(table, schema)
class PostgresqlMetadata(Metadata):
column_map = {
16: BooleanField,
17: BlobField,
20: BigIntegerField,
21: SmallIntegerField,
23: IntegerField,
25: TextField,
700: FloatField,
701: DoubleField,
1042: CharField, # blank-padded CHAR
1043: CharField,
1082: DateField,
1114: DateTimeField,
1184: DateTimeField,
1083: TimeField,
1266: TimeField,
1700: DecimalField,
2950: UUIDField, # UUID
}
array_types = {
1000: BooleanField,
1001: BlobField,
1005: SmallIntegerField,
1007: IntegerField,
1009: TextField,
1014: CharField,
1015: CharField,
1016: BigIntegerField,
1115: DateTimeField,
1182: DateField,
1183: TimeField,
2951: UUIDField,
}
extension_import = 'from playhouse.postgres_ext import *'
def __init__(self, database):
super(PostgresqlMetadata, self).__init__(database)
if postgres_ext is not None:
# Attempt to add types like HStore and JSON.
cursor = self.execute('select oid, typname, format_type(oid, NULL)'
' from pg_type;')
results = cursor.fetchall()
for oid, typname, formatted_type in results:
if typname == 'json':
self.column_map[oid] = postgres_ext.JSONField
elif typname == 'jsonb':
self.column_map[oid] = postgres_ext.BinaryJSONField
elif typname == 'hstore':
self.column_map[oid] = postgres_ext.HStoreField
elif typname == 'tsvector':
self.column_map[oid] = postgres_ext.TSVectorField
for oid in self.array_types:
self.column_map[oid] = postgres_ext.ArrayField
def get_column_types(self, table, schema):
column_types = {}
extra_params = {}
extension_types = set((
postgres_ext.ArrayField,
postgres_ext.BinaryJSONField,
postgres_ext.JSONField,
postgres_ext.TSVectorField,
postgres_ext.HStoreField)) if postgres_ext is not None else set()
# Look up the actual column type for each column.
identifier = '%s."%s"' % (schema, table)
cursor = self.execute(
'SELECT attname, atttypid FROM pg_catalog.pg_attribute '
'WHERE attrelid = %s::regclass AND attnum > %s', identifier, 0)
# Store column metadata in dictionary keyed by column name.
for name, oid in cursor.fetchall():
column_types[name] = self.column_map.get(oid, UnknownField)
if column_types[name] in extension_types:
self.requires_extension = True
if oid in self.array_types:
extra_params[name] = {'field_class': self.array_types[oid]}
return column_types, extra_params
def get_columns(self, table, schema=None):
schema = schema or 'public'
return super(PostgresqlMetadata, self).get_columns(table, schema)
def get_foreign_keys(self, table, schema=None):
schema = schema or 'public'
return super(PostgresqlMetadata, self).get_foreign_keys(table, schema)
def get_primary_keys(self, table, schema=None):
schema = schema or 'public'
return super(PostgresqlMetadata, self).get_primary_keys(table, schema)
def get_indexes(self, table, schema=None):
schema = schema or 'public'
return super(PostgresqlMetadata, self).get_indexes(table, schema)
class CockroachDBMetadata(PostgresqlMetadata):
# CRDB treats INT the same as BIGINT, so we just map bigint type OIDs to
# regular IntegerField.
column_map = PostgresqlMetadata.column_map.copy()
column_map[20] = IntegerField
array_types = PostgresqlMetadata.array_types.copy()
array_types[1016] = IntegerField
extension_import = 'from playhouse.cockroachdb import *'
def __init__(self, database):
Metadata.__init__(self, database)
self.requires_extension = True
if postgres_ext is not None:
# Attempt to add JSON types.
cursor = self.execute('select oid, typname, format_type(oid, NULL)'
' from pg_type;')
results = cursor.fetchall()
for oid, typname, formatted_type in results:
if typname == 'jsonb':
self.column_map[oid] = postgres_ext.BinaryJSONField
for oid in self.array_types:
self.column_map[oid] = postgres_ext.ArrayField
class MySQLMetadata(Metadata):
if FIELD_TYPE is None:
column_map = {}
else:
column_map = {
FIELD_TYPE.BLOB: TextField,
FIELD_TYPE.CHAR: CharField,
FIELD_TYPE.DATE: DateField,
FIELD_TYPE.DATETIME: DateTimeField,
FIELD_TYPE.DECIMAL: DecimalField,
FIELD_TYPE.DOUBLE: FloatField,
FIELD_TYPE.FLOAT: FloatField,
FIELD_TYPE.INT24: IntegerField,
FIELD_TYPE.LONG_BLOB: TextField,
FIELD_TYPE.LONG: IntegerField,
FIELD_TYPE.LONGLONG: BigIntegerField,
FIELD_TYPE.MEDIUM_BLOB: TextField,
FIELD_TYPE.NEWDECIMAL: DecimalField,
FIELD_TYPE.SHORT: IntegerField,
FIELD_TYPE.STRING: CharField,
FIELD_TYPE.TIMESTAMP: DateTimeField,
FIELD_TYPE.TIME: TimeField,
FIELD_TYPE.TINY_BLOB: TextField,
FIELD_TYPE.TINY: IntegerField,
FIELD_TYPE.VAR_STRING: CharField,
}
def __init__(self, database, **kwargs):
if 'password' in kwargs:
kwargs['passwd'] = kwargs.pop('password')
super(MySQLMetadata, self).__init__(database, **kwargs)
def get_column_types(self, table, schema=None):
column_types = {}
# Look up the actual column type for each column.
cursor = self.execute('SELECT * FROM `%s` LIMIT 1' % table)
# Store column metadata in dictionary keyed by column name.
for column_description in cursor.description:
name, type_code = column_description[:2]
column_types[name] = self.column_map.get(type_code, UnknownField)
return column_types, {}
class SqliteMetadata(Metadata):
column_map = {
'bigint': BigIntegerField,
'blob': BlobField,
'bool': BooleanField,
'boolean': BooleanField,
'char': CharField,
'date': DateField,
'datetime': DateTimeField,
'decimal': DecimalField,
'float': FloatField,
'integer': IntegerField,
'integer unsigned': IntegerField,
'int': IntegerField,
'long': BigIntegerField,
'numeric': DecimalField,
'real': FloatField,
'smallinteger': IntegerField,
'smallint': IntegerField,
'smallint unsigned': IntegerField,
'text': TextField,
'time': TimeField,
'varchar': CharField,
}
begin = '(?:["\[\(]+)?'
end = '(?:["\]\)]+)?'
re_foreign_key = (
'(?:FOREIGN KEY\s*)?'
'{begin}(.+?){end}\s+(?:.+\s+)?'
'references\s+{begin}(.+?){end}'
'\s*\(["|\[]?(.+?)["|\]]?\)').format(begin=begin, end=end)
re_varchar = r'^\s*(?:var)?char\s*\(\s*(\d+)\s*\)\s*$'
def _map_col(self, column_type):
raw_column_type = column_type.lower()
if raw_column_type in self.column_map:
field_class = self.column_map[raw_column_type]
elif re.search(self.re_varchar, raw_column_type):
field_class = CharField
else:
column_type = re.sub('\(.+\)', '', raw_column_type)
if column_type == '':
field_class = BareField
else:
field_class = self.column_map.get(column_type, UnknownField)
return field_class
def get_column_types(self, table, schema=None):
column_types = {}
columns = self.database.get_columns(table)
for column in columns:
column_types[column.name] = self._map_col(column.data_type)
return column_types, {}
_DatabaseMetadata = namedtuple('_DatabaseMetadata', (
'columns',
'primary_keys',
'foreign_keys',
'model_names',
'indexes'))
class DatabaseMetadata(_DatabaseMetadata):
def multi_column_indexes(self, table):
accum = []
for index in self.indexes[table]:
if len(index.columns) > 1:
field_names = [self.columns[table][column].name
for column in index.columns
if column in self.columns[table]]
accum.append((field_names, index.unique))
return accum
def column_indexes(self, table):
accum = {}
for index in self.indexes[table]:
if len(index.columns) == 1:
accum[index.columns[0]] = index.unique
return accum
class Introspector(object):
pk_classes = [AutoField, IntegerField]
def __init__(self, metadata, schema=None):
self.metadata = metadata
self.schema = schema
def __repr__(self):
return '<Introspector: %s>' % self.metadata.database
@classmethod
def from_database(cls, database, schema=None):
if isinstance(database, Proxy):
if database.obj is None:
raise ValueError('Cannot introspect an uninitialized Proxy.')
database = database.obj # Reference the proxied db obj.
if CockroachDatabase and isinstance(database, CockroachDatabase):
metadata = CockroachDBMetadata(database)
elif isinstance(database, PostgresqlDatabase):
metadata = PostgresqlMetadata(database)
elif isinstance(database, MySQLDatabase):
metadata = MySQLMetadata(database)
elif isinstance(database, SqliteDatabase):
metadata = SqliteMetadata(database)
else:
raise ValueError('Introspection not supported for %r' % database)
return cls(metadata, schema=schema)
def get_database_class(self):
return type(self.metadata.database)
def get_database_name(self):
return self.metadata.database.database
def get_database_kwargs(self):
return self.metadata.database.connect_params
def get_additional_imports(self):
if self.metadata.requires_extension:
return '\n' + self.metadata.extension_import
return ''
def make_model_name(self, table, snake_case=True):
if snake_case:
table = make_snake_case(table)
model = re.sub(r'[^\w]+', '', table)
model_name = ''.join(sub.title() for sub in model.split('_'))
if not model_name[0].isalpha():
model_name = 'T' + model_name
return model_name
def make_column_name(self, column, is_foreign_key=False, snake_case=True):
column = column.strip()
if snake_case:
column = make_snake_case(column)
column = column.lower()
if is_foreign_key:
# Strip "_id" from foreign keys, unless the foreign-key happens to
# be named "_id", in which case the name is retained.
column = re.sub('_id$', '', column) or column
# Remove characters that are invalid for Python identifiers.
column = re.sub(r'[^\w]+', '_', column)
if column in RESERVED_WORDS:
column += '_'
if len(column) and column[0].isdigit():
column = '_' + column
return column
def introspect(self, table_names=None, literal_column_names=False,
include_views=False, snake_case=True):
# Retrieve all the tables in the database.
tables = self.metadata.database.get_tables(schema=self.schema)
if include_views:
views = self.metadata.database.get_views(schema=self.schema)
tables.extend([view.name for view in views])
if table_names is not None:
tables = [table for table in tables if table in table_names]
table_set = set(tables)
# Store a mapping of table name -> dictionary of columns.
columns = {}
# Store a mapping of table name -> set of primary key columns.
primary_keys = {}
# Store a mapping of table -> foreign keys.
foreign_keys = {}
# Store a mapping of table name -> model name.
model_names = {}
# Store a mapping of table name -> indexes.
indexes = {}
# Gather the columns for each table.
for table in tables:
table_indexes = self.metadata.get_indexes(table, self.schema)
table_columns = self.metadata.get_columns(table, self.schema)
try:
foreign_keys[table] = self.metadata.get_foreign_keys(
table, self.schema)
except ValueError as exc:
err(*exc.args)
foreign_keys[table] = []
else:
# If there is a possibility we could exclude a dependent table,
# ensure that we introspect it so FKs will work.
if table_names is not None:
for foreign_key in foreign_keys[table]:
if foreign_key.dest_table not in table_set:
tables.append(foreign_key.dest_table)
table_set.add(foreign_key.dest_table)
model_names[table] = self.make_model_name(table, snake_case)
# Collect sets of all the column names as well as all the
# foreign-key column names.
lower_col_names = set(column_name.lower()
for column_name in table_columns)
fks = set(fk_col.column for fk_col in foreign_keys[table])
for col_name, column in table_columns.items():
if literal_column_names:
new_name = re.sub(r'[^\w]+', '_', col_name)
else:
new_name = self.make_column_name(col_name, col_name in fks,
snake_case)
# If we have two columns, "parent" and "parent_id", ensure
# that when we don't introduce naming conflicts.
lower_name = col_name.lower()
if lower_name.endswith('_id') and new_name in lower_col_names:
new_name = col_name.lower()
column.name = new_name
for index in table_indexes:
if len(index.columns) == 1:
column = index.columns[0]
if column in table_columns:
table_columns[column].unique = index.unique
table_columns[column].index = True
primary_keys[table] = self.metadata.get_primary_keys(
table, self.schema)
columns[table] = table_columns
indexes[table] = table_indexes
# Gather all instances where we might have a `related_name` conflict,
# either due to multiple FKs on a table pointing to the same table,
# or a related_name that would conflict with an existing field.
related_names = {}
sort_fn = lambda foreign_key: foreign_key.column
for table in tables:
models_referenced = set()
for foreign_key in sorted(foreign_keys[table], key=sort_fn):
try:
column = columns[table][foreign_key.column]
except KeyError:
continue
dest_table = foreign_key.dest_table
if dest_table in models_referenced:
related_names[column] = '%s_%s_set' % (
dest_table,
column.name)
else:
models_referenced.add(dest_table)
# On the second pass convert all foreign keys.
for table in tables:
for foreign_key in foreign_keys[table]:
src = columns[foreign_key.table][foreign_key.column]
try:
dest = columns[foreign_key.dest_table][
foreign_key.dest_column]
except KeyError:
dest = None
src.set_foreign_key(
foreign_key=foreign_key,
model_names=model_names,
dest=dest,
related_name=related_names.get(src))
return DatabaseMetadata(
columns,
primary_keys,
foreign_keys,
model_names,
indexes)
def generate_models(self, skip_invalid=False, table_names=None,
literal_column_names=False, bare_fields=False,
include_views=False):
database = self.introspect(table_names, literal_column_names,
include_views)
models = {}
class BaseModel(Model):
class Meta:
database = self.metadata.database
schema = self.schema
pending = set()
def _create_model(table, models):
pending.add(table)
for foreign_key in database.foreign_keys[table]:
dest = foreign_key.dest_table
if dest not in models and dest != table:
if dest in pending:
warnings.warn('Possible reference cycle found between '
'%s and %s' % (table, dest))
else:
_create_model(dest, models)
primary_keys = []
columns = database.columns[table]
for column_name, column in columns.items():
if column.primary_key:
primary_keys.append(column.name)
multi_column_indexes = database.multi_column_indexes(table)
column_indexes = database.column_indexes(table)
class Meta:
indexes = multi_column_indexes
table_name = table
# Fix models with multi-column primary keys.
composite_key = False
if len(primary_keys) == 0:
primary_keys = columns.keys()
if len(primary_keys) > 1:
Meta.primary_key = CompositeKey(*[
field.name for col, field in columns.items()
if col in primary_keys])
composite_key = True
attrs = {'Meta': Meta}
for column_name, column in columns.items():
FieldClass = column.field_class
if FieldClass is not ForeignKeyField and bare_fields:
FieldClass = BareField
elif FieldClass is UnknownField:
FieldClass = BareField
params = {
'column_name': column_name,
'null': column.nullable}
if column.primary_key and composite_key:
if FieldClass is AutoField:
FieldClass = IntegerField
params['primary_key'] = False
elif column.primary_key and FieldClass is not AutoField:
params['primary_key'] = True
if column.is_foreign_key():
if column.is_self_referential_fk():
params['model'] = 'self'
else:
dest_table = column.foreign_key.dest_table
if dest_table in models:
params['model'] = models[dest_table]
else:
FieldClass = DeferredForeignKey
params['rel_model_name'] = dest_table
if column.to_field:
params['field'] = column.to_field
# Generate a unique related name.
params['backref'] = '%s_%s_rel' % (table, column_name)
if column.default is not None:
constraint = SQL('DEFAULT %s' % column.default)
params['constraints'] = [constraint]
if column_name in column_indexes and not \
column.is_primary_key():
if column_indexes[column_name]:
params['unique'] = True
elif not column.is_foreign_key():
params['index'] = True
attrs[column.name] = FieldClass(**params)
try:
models[table] = type(str(table), (BaseModel,), attrs)
except ValueError:
if not skip_invalid:
raise
finally:
if table in pending:
pending.remove(table)
# Actually generate Model classes.
for table, model in sorted(database.model_names.items()):
if table not in models:
_create_model(table, models)
return models
def introspect(database, schema=None):
introspector = Introspector.from_database(database, schema=schema)
return introspector.introspect()
def generate_models(database, schema=None, **options):
introspector = Introspector.from_database(database, schema=schema)
return introspector.generate_models(**options)
def print_model(model, indexes=True, inline_indexes=False):
print(model._meta.name)
for field in model._meta.sorted_fields:
parts = [' %s %s' % (field.name, field.field_type)]
if field.primary_key:
parts.append(' PK')
elif inline_indexes:
if field.unique:
parts.append(' UNIQUE')
elif field.index:
parts.append(' INDEX')
if isinstance(field, ForeignKeyField):
parts.append(' FK: %s.%s' % (field.rel_model.__name__,
field.rel_field.name))
print(''.join(parts))
if indexes:
index_list = model._meta.fields_to_index()
if not index_list:
return
print('\nindex(es)')
for index in index_list:
parts = [' ']
ctx = model._meta.database.get_sql_context()
with ctx.scope_values(param='%s', quote='""'):
ctx.sql(CommaNodeList(index._expressions))
if index._where:
ctx.literal(' WHERE ')
ctx.sql(index._where)
sql, params = ctx.query()
clean = sql % tuple(map(_query_val_transform, params))
parts.append(clean.replace('"', ''))
if index._unique:
parts.append(' UNIQUE')
print(''.join(parts))
def get_table_sql(model):
sql, params = model._schema._create_table().query()
if model._meta.database.param != '%s':
sql = sql.replace(model._meta.database.param, '%s')
# Format and indent the table declaration, simplest possible approach.
match_obj = re.match('^(.+?\()(.+)(\).*)', sql)
create, columns, extra = match_obj.groups()
indented = ',\n'.join(' %s' % column for column in columns.split(', '))
clean = '\n'.join((create, indented, extra)).strip()
return clean % tuple(map(_query_val_transform, params))
def print_table_sql(model):
print(get_table_sql(model))