Spaces:
Running
Running
File size: 23,351 Bytes
2a0bc63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 |
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
import json
import logging
import os
import warnings
from itertools import product
from cassandra import metadata
from cassandra.cqlengine import CQLEngineException
from cassandra.cqlengine import columns, query
from cassandra.cqlengine.connection import execute, get_cluster, format_log_context
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.named import NamedTable
from cassandra.cqlengine.usertype import UserType
CQLENG_ALLOW_SCHEMA_MANAGEMENT = 'CQLENG_ALLOW_SCHEMA_MANAGEMENT'
Field = namedtuple('Field', ['name', 'type'])
log = logging.getLogger(__name__)
# system keyspaces
schema_columnfamilies = NamedTable('system', 'schema_columnfamilies')
def _get_context(keyspaces, connections):
"""Return all the execution contexts"""
if keyspaces:
if not isinstance(keyspaces, (list, tuple)):
raise ValueError('keyspaces must be a list or a tuple.')
if connections:
if not isinstance(connections, (list, tuple)):
raise ValueError('connections must be a list or a tuple.')
keyspaces = keyspaces if keyspaces else [None]
connections = connections if connections else [None]
return product(connections, keyspaces)
def create_keyspace_simple(name, replication_factor, durable_writes=True, connections=None):
"""
Creates a keyspace with SimpleStrategy for replica placement
If the keyspace already exists, it will not be modified.
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
:param str name: name of keyspace to create
:param int replication_factor: keyspace replication factor, used with :attr:`~.SimpleStrategy`
:param bool durable_writes: Write log is bypassed if set to False
:param list connections: List of connection names
"""
_create_keyspace(name, durable_writes, 'SimpleStrategy',
{'replication_factor': replication_factor}, connections=connections)
def create_keyspace_network_topology(name, dc_replication_map, durable_writes=True, connections=None):
"""
Creates a keyspace with NetworkTopologyStrategy for replica placement
If the keyspace already exists, it will not be modified.
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
:param str name: name of keyspace to create
:param dict dc_replication_map: map of dc_names: replication_factor
:param bool durable_writes: Write log is bypassed if set to False
:param list connections: List of connection names
"""
_create_keyspace(name, durable_writes, 'NetworkTopologyStrategy', dc_replication_map, connections=connections)
def _create_keyspace(name, durable_writes, strategy_class, strategy_options, connections=None):
if not _allow_schema_modification():
return
if connections:
if not isinstance(connections, (list, tuple)):
raise ValueError('Connections must be a list or a tuple.')
def __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=None):
cluster = get_cluster(connection)
if name not in cluster.metadata.keyspaces:
log.info(format_log_context("Creating keyspace %s", connection=connection), name)
ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options)
execute(ks_meta.as_cql_query(), connection=connection)
else:
log.info(format_log_context("Not creating keyspace %s because it already exists", connection=connection), name)
if connections:
for connection in connections:
__create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=connection)
else:
__create_keyspace(name, durable_writes, strategy_class, strategy_options)
def drop_keyspace(name, connections=None):
"""
Drops a keyspace, if it exists.
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
:param str name: name of keyspace to drop
:param list connections: List of connection names
"""
if not _allow_schema_modification():
return
if connections:
if not isinstance(connections, (list, tuple)):
raise ValueError('Connections must be a list or a tuple.')
def _drop_keyspace(name, connection=None):
cluster = get_cluster(connection)
if name in cluster.metadata.keyspaces:
execute("DROP KEYSPACE {0}".format(metadata.protect_name(name)), connection=connection)
if connections:
for connection in connections:
_drop_keyspace(name, connection)
else:
_drop_keyspace(name)
def _get_index_name_by_column(table, column_name):
"""
Find the index name for a given table and column.
"""
protected_name = metadata.protect_name(column_name)
possible_index_values = [protected_name, "values(%s)" % protected_name]
for index_metadata in table.indexes.values():
options = dict(index_metadata.index_options)
if options.get('target') in possible_index_values:
return index_metadata.name
def sync_table(model, keyspaces=None, connections=None):
"""
Inspects the model and creates / updates the corresponding table and columns.
If `keyspaces` is specified, the table will be synched for all specified keyspaces.
Note that the `Model.__keyspace__` is ignored in that case.
If `connections` is specified, the table will be synched for all specified connections. Note that the `Model.__connection__` is ignored in that case.
If not specified, it will try to get the connection from the Model.
Any User Defined Types used in the table are implicitly synchronized.
This function can only add fields that are not part of the primary key.
Note that the attributes removed from the model are not deleted on the database.
They become effectively ignored by (will not show up on) the model.
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
"""
context = _get_context(keyspaces, connections)
for connection, keyspace in context:
with query.ContextQuery(model, keyspace=keyspace) as m:
_sync_table(m, connection=connection)
def _sync_table(model, connection=None):
if not _allow_schema_modification():
return
if not issubclass(model, Model):
raise CQLEngineException("Models must be derived from base Model.")
if model.__abstract__:
raise CQLEngineException("cannot create table from abstract model")
cf_name = model.column_family_name()
raw_cf_name = model._raw_column_family_name()
ks_name = model._get_keyspace()
connection = connection or model._get_connection()
cluster = get_cluster(connection)
try:
keyspace = cluster.metadata.keyspaces[ks_name]
except KeyError:
msg = format_log_context("Keyspace '{0}' for model {1} does not exist.", connection=connection)
raise CQLEngineException(msg.format(ks_name, model))
tables = keyspace.tables
syncd_types = set()
for col in model._columns.values():
udts = []
columns.resolve_udts(col, udts)
for udt in [u for u in udts if u not in syncd_types]:
_sync_type(ks_name, udt, syncd_types, connection=connection)
if raw_cf_name not in tables:
log.debug(format_log_context("sync_table creating new table %s", keyspace=ks_name, connection=connection), cf_name)
qs = _get_create_table(model)
try:
execute(qs, connection=connection)
except CQLEngineException as ex:
# 1.2 doesn't return cf names, so we have to examine the exception
# and ignore if it says the column family already exists
if "Cannot add already existing column family" not in str(ex):
raise
else:
log.debug(format_log_context("sync_table checking existing table %s", keyspace=ks_name, connection=connection), cf_name)
table_meta = tables[raw_cf_name]
_validate_pk(model, table_meta)
table_columns = table_meta.columns
model_fields = set()
for model_name, col in model._columns.items():
db_name = col.db_field_name
model_fields.add(db_name)
if db_name in table_columns:
col_meta = table_columns[db_name]
if col_meta.cql_type != col.db_type:
msg = format_log_context('Existing table {0} has column "{1}" with a type ({2}) differing from the model type ({3}).'
' Model should be updated.', keyspace=ks_name, connection=connection)
msg = msg.format(cf_name, db_name, col_meta.cql_type, col.db_type)
warnings.warn(msg)
log.warning(msg)
continue
if col.primary_key or col.primary_key:
msg = format_log_context("Cannot add primary key '{0}' (with db_field '{1}') to existing table {2}", keyspace=ks_name, connection=connection)
raise CQLEngineException(msg.format(model_name, db_name, cf_name))
query = "ALTER TABLE {0} add {1}".format(cf_name, col.get_column_def())
execute(query, connection=connection)
db_fields_not_in_model = model_fields.symmetric_difference(table_columns)
if db_fields_not_in_model:
msg = format_log_context("Table {0} has fields not referenced by model: {1}", keyspace=ks_name, connection=connection)
log.info(msg.format(cf_name, db_fields_not_in_model))
_update_options(model, connection=connection)
table = cluster.metadata.keyspaces[ks_name].tables[raw_cf_name]
indexes = [c for n, c in model._columns.items() if c.index]
# TODO: support multiple indexes in C* 3.0+
for column in indexes:
index_name = _get_index_name_by_column(table, column.db_field_name)
if index_name:
continue
qs = ['CREATE INDEX']
qs += ['ON {0}'.format(cf_name)]
qs += ['("{0}")'.format(column.db_field_name)]
qs = ' '.join(qs)
execute(qs, connection=connection)
def _validate_pk(model, table_meta):
model_partition = [c.db_field_name for c in model._partition_keys.values()]
meta_partition = [c.name for c in table_meta.partition_key]
model_clustering = [c.db_field_name for c in model._clustering_keys.values()]
meta_clustering = [c.name for c in table_meta.clustering_key]
if model_partition != meta_partition or model_clustering != meta_clustering:
def _pk_string(partition, clustering):
return "PRIMARY KEY (({0}){1})".format(', '.join(partition), ', ' + ', '.join(clustering) if clustering else '')
raise CQLEngineException("Model {0} PRIMARY KEY composition does not match existing table {1}. "
"Model: {2}; Table: {3}. "
"Update model or drop the table.".format(model, model.column_family_name(),
_pk_string(model_partition, model_clustering),
_pk_string(meta_partition, meta_clustering)))
def sync_type(ks_name, type_model, connection=None):
"""
Inspects the type_model and creates / updates the corresponding type.
Note that the attributes removed from the type_model are not deleted on the database (this operation is not supported).
They become effectively ignored by (will not show up on) the type_model.
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
"""
if not _allow_schema_modification():
return
if not issubclass(type_model, UserType):
raise CQLEngineException("Types must be derived from base UserType.")
_sync_type(ks_name, type_model, connection=connection)
def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None):
syncd_sub_types = omit_subtypes or set()
for field in type_model._fields.values():
udts = []
columns.resolve_udts(field, udts)
for udt in [u for u in udts if u not in syncd_sub_types]:
_sync_type(ks_name, udt, syncd_sub_types, connection=connection)
syncd_sub_types.add(udt)
type_name = type_model.type_name()
type_name_qualified = "%s.%s" % (ks_name, type_name)
cluster = get_cluster(connection)
keyspace = cluster.metadata.keyspaces[ks_name]
defined_types = keyspace.user_types
if type_name not in defined_types:
log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified)
cql = get_create_type(type_model, ks_name)
execute(cql, connection=connection)
cluster.refresh_user_type_metadata(ks_name, type_name)
type_model.register_for_keyspace(ks_name, connection=connection)
else:
type_meta = defined_types[type_name]
defined_fields = type_meta.field_names
model_fields = set()
for field in type_model._fields.values():
model_fields.add(field.db_field_name)
if field.db_field_name not in defined_fields:
execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection)
else:
field_type = type_meta.field_types[defined_fields.index(field.db_field_name)]
if field_type != field.db_type:
msg = format_log_context('Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).'
' UserType should be updated.', keyspace=ks_name, connection=connection)
msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type)
warnings.warn(msg)
log.warning(msg)
type_model.register_for_keyspace(ks_name, connection=connection)
if len(defined_fields) == len(model_fields):
log.info(format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified)
return
db_fields_not_in_model = model_fields.symmetric_difference(defined_fields)
if db_fields_not_in_model:
msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection)
log.info(msg, type_name_qualified, db_fields_not_in_model)
def get_create_type(type_model, keyspace):
type_meta = metadata.UserType(keyspace,
type_model.type_name(),
(f.db_field_name for f in type_model._fields.values()),
(v.db_type for v in type_model._fields.values()))
return type_meta.as_cql_query()
def _get_create_table(model):
ks_table_name = model.column_family_name()
query_strings = ['CREATE TABLE {0}'.format(ks_table_name)]
# add column types
pkeys = [] # primary keys
ckeys = [] # clustering keys
qtypes = [] # field types
def add_column(col):
s = col.get_column_def()
if col.primary_key:
keys = (pkeys if col.partition_key else ckeys)
keys.append('"{0}"'.format(col.db_field_name))
qtypes.append(s)
for name, col in model._columns.items():
add_column(col)
qtypes.append('PRIMARY KEY (({0}){1})'.format(', '.join(pkeys), ckeys and ', ' + ', '.join(ckeys) or ''))
query_strings += ['({0})'.format(', '.join(qtypes))]
property_strings = []
_order = ['"{0}" {1}'.format(c.db_field_name, c.clustering_order or 'ASC') for c in model._clustering_keys.values()]
if _order:
property_strings.append('CLUSTERING ORDER BY ({0})'.format(', '.join(_order)))
# options strings use the V3 format, which matches CQL more closely and does not require mapping
property_strings += metadata.TableMetadataV3._make_option_strings(model.__options__ or {})
if property_strings:
query_strings += ['WITH {0}'.format(' AND '.join(property_strings))]
return ' '.join(query_strings)
def _get_table_metadata(model, connection=None):
# returns the table as provided by the native driver for a given model
cluster = get_cluster(connection)
ks = model._get_keyspace()
table = model._raw_column_family_name()
table = cluster.metadata.keyspaces[ks].tables[table]
return table
def _options_map_from_strings(option_strings):
# converts options strings to a mapping to strings or dict
options = {}
for option in option_strings:
name, value = option.split('=')
i = value.find('{')
if i >= 0:
value = value[i:value.rfind('}') + 1].replace("'", '"') # from cql single quotes to json double; not aware of any values that would be escaped right now
value = json.loads(value)
else:
value = value.strip()
options[name.strip()] = value
return options
def _update_options(model, connection=None):
"""Updates the table options for the given model if necessary.
:param model: The model to update.
:param connection: Name of the connection to use
:return: `True`, if the options were modified in Cassandra,
`False` otherwise.
:rtype: bool
"""
ks_name = model._get_keyspace()
msg = format_log_context("Checking %s for option differences", keyspace=ks_name, connection=connection)
log.debug(msg, model)
model_options = model.__options__ or {}
table_meta = _get_table_metadata(model, connection=connection)
# go to CQL string first to normalize meta from different versions
existing_option_strings = set(table_meta._make_option_strings(table_meta.options))
existing_options = _options_map_from_strings(existing_option_strings)
model_option_strings = metadata.TableMetadataV3._make_option_strings(model_options)
model_options = _options_map_from_strings(model_option_strings)
update_options = {}
for name, value in model_options.items():
try:
existing_value = existing_options[name]
except KeyError:
msg = format_log_context("Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection)
raise KeyError(msg % (name, existing_options.keys()))
if isinstance(existing_value, str):
if value != existing_value:
update_options[name] = value
else:
try:
for k, v in value.items():
if existing_value[k] != v:
update_options[name] = value
break
except KeyError:
update_options[name] = value
if update_options:
options = ' AND '.join(metadata.TableMetadataV3._make_option_strings(update_options))
query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options)
execute(query, connection=connection)
return True
return False
def drop_table(model, keyspaces=None, connections=None):
"""
Drops the table indicated by the model, if it exists.
If `keyspaces` is specified, the table will be dropped for all specified keyspaces. Note that the `Model.__keyspace__` is ignored in that case.
If `connections` is specified, the table will be synched for all specified connections. Note that the `Model.__connection__` is ignored in that case.
If not specified, it will try to get the connection from the Model.
**This function should be used with caution, especially in production environments.
Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**
*There are plans to guard schema-modifying functions with an environment-driven conditional.*
"""
context = _get_context(keyspaces, connections)
for connection, keyspace in context:
with query.ContextQuery(model, keyspace=keyspace) as m:
_drop_table(m, connection=connection)
def _drop_table(model, connection=None):
if not _allow_schema_modification():
return
connection = connection or model._get_connection()
# don't try to delete non existant tables
meta = get_cluster(connection).metadata
ks_name = model._get_keyspace()
raw_cf_name = model._raw_column_family_name()
try:
meta.keyspaces[ks_name].tables[raw_cf_name]
execute('DROP TABLE {0};'.format(model.column_family_name()), connection=connection)
except KeyError:
pass
def _allow_schema_modification():
if not os.getenv(CQLENG_ALLOW_SCHEMA_MANAGEMENT):
msg = CQLENG_ALLOW_SCHEMA_MANAGEMENT + " environment variable is not set. Future versions of this package will require this variable to enable management functions."
warnings.warn(msg)
log.warning(msg)
return True
|