File size: 23,351 Bytes
2a0bc63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import namedtuple
import json
import logging
import os
import warnings
from itertools import product

from cassandra import metadata
from cassandra.cqlengine import CQLEngineException
from cassandra.cqlengine import columns, query
from cassandra.cqlengine.connection import execute, get_cluster, format_log_context
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.named import NamedTable
from cassandra.cqlengine.usertype import UserType

CQLENG_ALLOW_SCHEMA_MANAGEMENT = 'CQLENG_ALLOW_SCHEMA_MANAGEMENT'

Field = namedtuple('Field', ['name', 'type'])

log = logging.getLogger(__name__)

# system keyspaces
schema_columnfamilies = NamedTable('system', 'schema_columnfamilies')


def _get_context(keyspaces, connections):
    """Return all the execution contexts"""

    if keyspaces:
        if not isinstance(keyspaces, (list, tuple)):
            raise ValueError('keyspaces must be a list or a tuple.')

    if connections:
        if not isinstance(connections, (list, tuple)):
            raise ValueError('connections must be a list or a tuple.')

    keyspaces = keyspaces if keyspaces else [None]
    connections = connections if connections else [None]

    return product(connections, keyspaces)


def create_keyspace_simple(name, replication_factor, durable_writes=True, connections=None):
    """

    Creates a keyspace with SimpleStrategy for replica placement



    If the keyspace already exists, it will not be modified.



    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*



    :param str name: name of keyspace to create

    :param int replication_factor: keyspace replication factor, used with :attr:`~.SimpleStrategy`

    :param bool durable_writes: Write log is bypassed if set to False

    :param list connections: List of connection names

    """
    _create_keyspace(name, durable_writes, 'SimpleStrategy',
                     {'replication_factor': replication_factor}, connections=connections)


def create_keyspace_network_topology(name, dc_replication_map, durable_writes=True, connections=None):
    """

    Creates a keyspace with NetworkTopologyStrategy for replica placement



    If the keyspace already exists, it will not be modified.



    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*



    :param str name: name of keyspace to create

    :param dict dc_replication_map: map of dc_names: replication_factor

    :param bool durable_writes: Write log is bypassed if set to False

    :param list connections: List of connection names

    """
    _create_keyspace(name, durable_writes, 'NetworkTopologyStrategy', dc_replication_map, connections=connections)


def _create_keyspace(name, durable_writes, strategy_class, strategy_options, connections=None):
    if not _allow_schema_modification():
        return

    if connections:
        if not isinstance(connections, (list, tuple)):
            raise ValueError('Connections must be a list or a tuple.')

    def __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=None):
        cluster = get_cluster(connection)

        if name not in cluster.metadata.keyspaces:
            log.info(format_log_context("Creating keyspace %s", connection=connection), name)
            ks_meta = metadata.KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options)
            execute(ks_meta.as_cql_query(), connection=connection)
        else:
            log.info(format_log_context("Not creating keyspace %s because it already exists", connection=connection), name)

    if connections:
        for connection in connections:
            __create_keyspace(name, durable_writes, strategy_class, strategy_options, connection=connection)
    else:
        __create_keyspace(name, durable_writes, strategy_class, strategy_options)


def drop_keyspace(name, connections=None):
    """

    Drops a keyspace, if it exists.



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*



    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    :param str name: name of keyspace to drop

    :param list connections: List of connection names

    """
    if not _allow_schema_modification():
        return

    if connections:
        if not isinstance(connections, (list, tuple)):
            raise ValueError('Connections must be a list or a tuple.')

    def _drop_keyspace(name, connection=None):
        cluster = get_cluster(connection)
        if name in cluster.metadata.keyspaces:
            execute("DROP KEYSPACE {0}".format(metadata.protect_name(name)), connection=connection)

    if connections:
        for connection in connections:
            _drop_keyspace(name, connection)
    else:
        _drop_keyspace(name)

def _get_index_name_by_column(table, column_name):
    """

    Find the index name for a given table and column.

    """
    protected_name = metadata.protect_name(column_name)
    possible_index_values = [protected_name, "values(%s)" % protected_name]
    for index_metadata in table.indexes.values():
        options = dict(index_metadata.index_options)
        if options.get('target') in possible_index_values:
            return index_metadata.name


def sync_table(model, keyspaces=None, connections=None):
    """

    Inspects the model and creates / updates the corresponding table and columns.



    If `keyspaces` is specified, the table will be synched for all specified keyspaces.

    Note that the `Model.__keyspace__` is ignored in that case.



    If `connections` is specified, the table will be synched for all specified connections. Note that the `Model.__connection__` is ignored in that case.

    If not specified, it will try to get the connection from the Model.



    Any User Defined Types used in the table are implicitly synchronized.



    This function can only add fields that are not part of the primary key.



    Note that the attributes removed from the model are not deleted on the database.

    They become effectively ignored by (will not show up on) the model.



    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*

    """

    context = _get_context(keyspaces, connections)
    for connection, keyspace in context:
        with query.ContextQuery(model, keyspace=keyspace) as m:
            _sync_table(m, connection=connection)


def _sync_table(model, connection=None):
    if not _allow_schema_modification():
        return

    if not issubclass(model, Model):
        raise CQLEngineException("Models must be derived from base Model.")

    if model.__abstract__:
        raise CQLEngineException("cannot create table from abstract model")

    cf_name = model.column_family_name()
    raw_cf_name = model._raw_column_family_name()

    ks_name = model._get_keyspace()
    connection = connection or model._get_connection()

    cluster = get_cluster(connection)

    try:
        keyspace = cluster.metadata.keyspaces[ks_name]
    except KeyError:
        msg = format_log_context("Keyspace '{0}' for model {1} does not exist.", connection=connection)
        raise CQLEngineException(msg.format(ks_name, model))

    tables = keyspace.tables

    syncd_types = set()
    for col in model._columns.values():
        udts = []
        columns.resolve_udts(col, udts)
        for udt in [u for u in udts if u not in syncd_types]:
            _sync_type(ks_name, udt, syncd_types, connection=connection)

    if raw_cf_name not in tables:
        log.debug(format_log_context("sync_table creating new table %s", keyspace=ks_name, connection=connection), cf_name)
        qs = _get_create_table(model)

        try:
            execute(qs, connection=connection)
        except CQLEngineException as ex:
            # 1.2 doesn't return cf names, so we have to examine the exception
            # and ignore if it says the column family already exists
            if "Cannot add already existing column family" not in str(ex):
                raise
    else:
        log.debug(format_log_context("sync_table checking existing table %s", keyspace=ks_name, connection=connection), cf_name)
        table_meta = tables[raw_cf_name]

        _validate_pk(model, table_meta)

        table_columns = table_meta.columns
        model_fields = set()

        for model_name, col in model._columns.items():
            db_name = col.db_field_name
            model_fields.add(db_name)
            if db_name in table_columns:
                col_meta = table_columns[db_name]
                if col_meta.cql_type != col.db_type:
                    msg = format_log_context('Existing table {0} has column "{1}" with a type ({2}) differing from the model type ({3}).'
                                  ' Model should be updated.', keyspace=ks_name, connection=connection)
                    msg = msg.format(cf_name, db_name, col_meta.cql_type, col.db_type)
                    warnings.warn(msg)
                    log.warning(msg)

                continue

            if col.primary_key or col.primary_key:
                msg = format_log_context("Cannot add primary key '{0}' (with db_field '{1}') to existing table {2}", keyspace=ks_name, connection=connection)
                raise CQLEngineException(msg.format(model_name, db_name, cf_name))

            query = "ALTER TABLE {0} add {1}".format(cf_name, col.get_column_def())
            execute(query, connection=connection)

        db_fields_not_in_model = model_fields.symmetric_difference(table_columns)
        if db_fields_not_in_model:
            msg = format_log_context("Table {0} has fields not referenced by model: {1}", keyspace=ks_name, connection=connection)
            log.info(msg.format(cf_name, db_fields_not_in_model))

        _update_options(model, connection=connection)

    table = cluster.metadata.keyspaces[ks_name].tables[raw_cf_name]

    indexes = [c for n, c in model._columns.items() if c.index]

    # TODO: support multiple indexes in C* 3.0+
    for column in indexes:
        index_name = _get_index_name_by_column(table, column.db_field_name)
        if index_name:
            continue

        qs = ['CREATE INDEX']
        qs += ['ON {0}'.format(cf_name)]
        qs += ['("{0}")'.format(column.db_field_name)]
        qs = ' '.join(qs)
        execute(qs, connection=connection)


def _validate_pk(model, table_meta):
    model_partition = [c.db_field_name for c in model._partition_keys.values()]
    meta_partition = [c.name for c in table_meta.partition_key]
    model_clustering = [c.db_field_name for c in model._clustering_keys.values()]
    meta_clustering = [c.name for c in table_meta.clustering_key]

    if model_partition != meta_partition or model_clustering != meta_clustering:
        def _pk_string(partition, clustering):
            return "PRIMARY KEY (({0}){1})".format(', '.join(partition), ', ' + ', '.join(clustering) if clustering else '')
        raise CQLEngineException("Model {0} PRIMARY KEY composition does not match existing table {1}. "
                                 "Model: {2}; Table: {3}. "
                                 "Update model or drop the table.".format(model, model.column_family_name(),
                                                                          _pk_string(model_partition, model_clustering),
                                                                          _pk_string(meta_partition, meta_clustering)))


def sync_type(ks_name, type_model, connection=None):
    """

    Inspects the type_model and creates / updates the corresponding type.



    Note that the attributes removed from the type_model are not deleted on the database (this operation is not supported).

    They become effectively ignored by (will not show up on) the type_model.



    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*

    """
    if not _allow_schema_modification():
        return

    if not issubclass(type_model, UserType):
        raise CQLEngineException("Types must be derived from base UserType.")

    _sync_type(ks_name, type_model, connection=connection)


def _sync_type(ks_name, type_model, omit_subtypes=None, connection=None):

    syncd_sub_types = omit_subtypes or set()
    for field in type_model._fields.values():
        udts = []
        columns.resolve_udts(field, udts)
        for udt in [u for u in udts if u not in syncd_sub_types]:
            _sync_type(ks_name, udt, syncd_sub_types, connection=connection)
            syncd_sub_types.add(udt)

    type_name = type_model.type_name()
    type_name_qualified = "%s.%s" % (ks_name, type_name)

    cluster = get_cluster(connection)

    keyspace = cluster.metadata.keyspaces[ks_name]
    defined_types = keyspace.user_types

    if type_name not in defined_types:
        log.debug(format_log_context("sync_type creating new type %s", keyspace=ks_name, connection=connection), type_name_qualified)
        cql = get_create_type(type_model, ks_name)
        execute(cql, connection=connection)
        cluster.refresh_user_type_metadata(ks_name, type_name)
        type_model.register_for_keyspace(ks_name, connection=connection)
    else:
        type_meta = defined_types[type_name]
        defined_fields = type_meta.field_names
        model_fields = set()
        for field in type_model._fields.values():
            model_fields.add(field.db_field_name)
            if field.db_field_name not in defined_fields:
                execute("ALTER TYPE {0} ADD {1}".format(type_name_qualified, field.get_column_def()), connection=connection)
            else:
                field_type = type_meta.field_types[defined_fields.index(field.db_field_name)]
                if field_type != field.db_type:
                    msg = format_log_context('Existing user type {0} has field "{1}" with a type ({2}) differing from the model user type ({3}).'
                                  ' UserType should be updated.', keyspace=ks_name, connection=connection)
                    msg = msg.format(type_name_qualified, field.db_field_name, field_type, field.db_type)
                    warnings.warn(msg)
                    log.warning(msg)

        type_model.register_for_keyspace(ks_name, connection=connection)

        if len(defined_fields) == len(model_fields):
            log.info(format_log_context("Type %s did not require synchronization", keyspace=ks_name, connection=connection), type_name_qualified)
            return

        db_fields_not_in_model = model_fields.symmetric_difference(defined_fields)
        if db_fields_not_in_model:
            msg = format_log_context("Type %s has fields not referenced by model: %s", keyspace=ks_name, connection=connection)
            log.info(msg, type_name_qualified, db_fields_not_in_model)


def get_create_type(type_model, keyspace):
    type_meta = metadata.UserType(keyspace,
                                  type_model.type_name(),
                                  (f.db_field_name for f in type_model._fields.values()),
                                  (v.db_type for v in type_model._fields.values()))
    return type_meta.as_cql_query()


def _get_create_table(model):
    ks_table_name = model.column_family_name()
    query_strings = ['CREATE TABLE {0}'.format(ks_table_name)]

    # add column types
    pkeys = []  # primary keys
    ckeys = []  # clustering keys
    qtypes = []  # field types

    def add_column(col):
        s = col.get_column_def()
        if col.primary_key:
            keys = (pkeys if col.partition_key else ckeys)
            keys.append('"{0}"'.format(col.db_field_name))
        qtypes.append(s)

    for name, col in model._columns.items():
        add_column(col)

    qtypes.append('PRIMARY KEY (({0}){1})'.format(', '.join(pkeys), ckeys and ', ' + ', '.join(ckeys) or ''))

    query_strings += ['({0})'.format(', '.join(qtypes))]

    property_strings = []

    _order = ['"{0}" {1}'.format(c.db_field_name, c.clustering_order or 'ASC') for c in model._clustering_keys.values()]
    if _order:
        property_strings.append('CLUSTERING ORDER BY ({0})'.format(', '.join(_order)))

    # options strings use the V3 format, which matches CQL more closely and does not require mapping
    property_strings += metadata.TableMetadataV3._make_option_strings(model.__options__ or {})

    if property_strings:
        query_strings += ['WITH {0}'.format(' AND '.join(property_strings))]

    return ' '.join(query_strings)


def _get_table_metadata(model, connection=None):
    # returns the table as provided by the native driver for a given model
    cluster = get_cluster(connection)
    ks = model._get_keyspace()
    table = model._raw_column_family_name()
    table = cluster.metadata.keyspaces[ks].tables[table]
    return table


def _options_map_from_strings(option_strings):
    # converts options strings to a mapping to strings or dict
    options = {}
    for option in option_strings:
        name, value = option.split('=')
        i = value.find('{')
        if i >= 0:
            value = value[i:value.rfind('}') + 1].replace("'", '"')  # from cql single quotes to json double; not aware of any values that would be escaped right now
            value = json.loads(value)
        else:
            value = value.strip()
        options[name.strip()] = value
    return options


def _update_options(model, connection=None):
    """Updates the table options for the given model if necessary.



    :param model: The model to update.

    :param connection: Name of the connection to use



    :return: `True`, if the options were modified in Cassandra,

        `False` otherwise.

    :rtype: bool

    """
    ks_name = model._get_keyspace()
    msg = format_log_context("Checking %s for option differences", keyspace=ks_name, connection=connection)
    log.debug(msg, model)
    model_options = model.__options__ or {}

    table_meta = _get_table_metadata(model, connection=connection)
    # go to CQL string first to normalize meta from different versions
    existing_option_strings = set(table_meta._make_option_strings(table_meta.options))
    existing_options = _options_map_from_strings(existing_option_strings)
    model_option_strings = metadata.TableMetadataV3._make_option_strings(model_options)
    model_options = _options_map_from_strings(model_option_strings)

    update_options = {}
    for name, value in model_options.items():
        try:
            existing_value = existing_options[name]
        except KeyError:
            msg = format_log_context("Invalid table option: '%s'; known options: %s", keyspace=ks_name, connection=connection)
            raise KeyError(msg % (name, existing_options.keys()))
        if isinstance(existing_value, str):
            if value != existing_value:
                update_options[name] = value
        else:
            try:
                for k, v in value.items():
                    if existing_value[k] != v:
                        update_options[name] = value
                        break
            except KeyError:
                update_options[name] = value

    if update_options:
        options = ' AND '.join(metadata.TableMetadataV3._make_option_strings(update_options))
        query = "ALTER TABLE {0} WITH {1}".format(model.column_family_name(), options)
        execute(query, connection=connection)
        return True

    return False


def drop_table(model, keyspaces=None, connections=None):
    """

    Drops the table indicated by the model, if it exists.



    If `keyspaces` is specified, the table will be dropped for all specified keyspaces. Note that the `Model.__keyspace__` is ignored in that case.



    If `connections` is specified, the table will be synched for all specified connections. Note that the `Model.__connection__` is ignored in that case.

    If not specified, it will try to get the connection from the Model.





    **This function should be used with caution, especially in production environments.

    Take care to execute schema modifications in a single context (i.e. not concurrently with other clients).**



    *There are plans to guard schema-modifying functions with an environment-driven conditional.*

    """

    context = _get_context(keyspaces, connections)
    for connection, keyspace in context:
        with query.ContextQuery(model, keyspace=keyspace) as m:
            _drop_table(m, connection=connection)


def _drop_table(model, connection=None):
    if not _allow_schema_modification():
        return

    connection = connection or model._get_connection()

    # don't try to delete non existant tables
    meta = get_cluster(connection).metadata

    ks_name = model._get_keyspace()
    raw_cf_name = model._raw_column_family_name()

    try:
        meta.keyspaces[ks_name].tables[raw_cf_name]
        execute('DROP TABLE {0};'.format(model.column_family_name()), connection=connection)
    except KeyError:
        pass


def _allow_schema_modification():
    if not os.getenv(CQLENG_ALLOW_SCHEMA_MANAGEMENT):
        msg = CQLENG_ALLOW_SCHEMA_MANAGEMENT + " environment variable is not set. Future versions of this package will require this variable to enable management functions."
        warnings.warn(msg)
        log.warning(msg)

    return True