Source code for relstorage.adapters.postgresql.schema

##############################################################################
#
# Copyright (c) 2009 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
Database schema installers
"""
from __future__ import absolute_import
from __future__ import print_function

from zope.interface import implementer

from ..connmanager import connection_callback
from ..interfaces import ISchemaInstaller
from ..schema import AbstractSchemaInstaller

logger = __import__('logging').getLogger(__name__)

class _StoredFunction(object):
    """
    Represents a function stored in the database.

    It is equal to another instance if it has the same
    name and same checksum (signature and creation statement
    are ignored).
    """

    # The function's name
    name = None

    # The function's signature: (boolean, bytea, bytea)
    signature = None

    # The function's CREATE statement
    create = None

    # The checksum for the create statement,
    # as stored in the object's comment.
    checksum = None

    def __init__(self, name, signature, checksum, create=None):
        self.name = name
        self.signature = signature
        self.checksum = checksum
        self.create = create

    def __str__(self):
        """
        Return ``name(signature)``
        """
        if self.signature is None:
            # We didn't read this from the database at all.
            # Don't pretend we know what it is.
            return self.name
        # Signature can be the empty string if it takes no params.
        return "%s(%s)" % (self.name, self.signature)

    def __repr__(self):
        return "<%s %s>" % (
            self, self.checksum
        )

    def __eq__(self, other):
        if not isinstance(other, _StoredFunction):
            return NotImplemented
        return self.name == other.name and self.checksum == other.checksum

    def __hash__(self):
        return hash((self.name, self.checksum))


[docs] @implementer(ISchemaInstaller) class PostgreSQLSchemaInstaller(AbstractSchemaInstaller): database_type = 'postgresql' _PROCEDURES = {} # Caching of proc files. def __init__(self, options, connmanager, runner, locker): self.options = options super().__init__( connmanager, runner, options.keep_history) self.locker = locker def _read_proc_files(self): procs = super()._read_proc_files() # Convert from bare strings into _StoredFunction objects # (which are missing their signatures at this point). current_object = 'current_object' if self.keep_history else 'object_state' if self.keep_history: object_state_join = """ INNER JOIN object_state ON (object_state.zoid = cur.zoid AND object_state.tid = cur.tid) """ object_state_name = 'object_state' else: object_state_join = "" object_state_name = 'cur' return { name: _StoredFunction( name, None, self._checksum_for_str(value), value.format( CURRENT_OBJECT=current_object, OBJECT_STATE_JOIN=object_state_join, OBJECT_STATE_NAME=object_state_name ) ) for name, value in procs.items() } def get_database_name(self, cursor): cursor.execute("SELECT current_database()") row, = cursor.fetchall() name, = row return self._metadata_to_native_str(name) @connection_callback(inherit=AbstractSchemaInstaller._prepare_with_connection) def _prepare_with_connection(self, conn, cursor): super()._prepare_with_connection(conn, cursor) # Do we need to merge blob chunks? if not self.options.shared_blob_dir: cursor.execute('SELECT chunk_num FROM blob_chunk WHERE chunk_num > 0 LIMIT 1') if cursor.fetchone(): logger.info("Merging blob chunks on the server.") cursor.execute("SELECT merge_blob_chunks()") # If we've done our job right, any blobs cached on # disk are still perfectly valid.
[docs] def create_procedures(self, cursor): if not self.__all_procedures_installed(cursor): self.__install_procedures(cursor) if not self.__all_procedures_installed(cursor): raise AssertionError( "Could not get version information after " "installing the stored procedures.")
[docs] def create_triggers(self, cursor): triggers = self.list_triggers(cursor) __traceback_info__ = triggers, self.list_tables(cursor), self.get_database_name(cursor) if 'blob_chunk_delete' not in triggers: self.__install_triggers(cursor)
def __native_names_only(self, cursor): native = self._metadata_to_native_str return frozenset([ native(name) for (name,) in cursor.fetchall() ]) def list_tables(self, cursor): cursor.execute("SELECT tablename FROM pg_tables") return self.__native_names_only(cursor) def list_sequences(self, cursor): cursor.execute("SELECT relname FROM pg_class WHERE relkind = 'S'") return self.__native_names_only(cursor) def list_views(self, cursor): cursor.execute("SELECT relname FROM pg_class WHERE relkind = 'v'") return self.__native_names_only(cursor) def list_languages(self, cursor): cursor.execute("SELECT lanname FROM pg_catalog.pg_language") return self.__native_names_only(cursor) def __install_languages(self, cursor): if 'plpgsql' not in self.list_languages(cursor): cursor.execute("CREATE LANGUAGE plpgsql")
[docs] def list_procedures(self, cursor): """ Returns {procedure name: _StoredFunction}. """ # The description is populated with # ``COMMENT ON FUNCTION <name>(<args>) IS 'comment'``. # Prior to Postgres 10, the args are required; with # 10 and later they are only needed if the function is overloaded. stmt = """ SELECT p.proname AS funcname, d.description, pg_catalog.pg_get_function_identity_arguments(p.oid) FROM pg_proc p INNER JOIN pg_namespace n ON n.oid = p.pronamespace LEFT JOIN pg_description As d ON (d.objoid = p.oid) WHERE n.nspname = 'public' """ cursor.execute(stmt) res = {} native = self._metadata_to_native_str for (name, checksum, signature) in cursor.fetchall(): name = native(name) checksum = native(checksum) signature = native(signature) disk_func = self.procedures.get(name) res[name.lower()] = db_func = _StoredFunction( name, signature, checksum, # Include the source, if it's around disk_func.create if disk_func else None ) if disk_func and disk_func.checksum == db_func.checksum: # Yay, what's in the database matches what's read from disk. # That means the signature must match too. disk_func.signature = db_func.signature # But of course we don't add a checksum to the db_func # if we don't already have one; we wouldn't be able to tell # a mismatch. return res
def __all_procedures_installed(self, cursor): """ Check whether all required stored procedures are installed. Returns True only if all required procedures are installed and up to date. """ expected = self.procedures installed = self.list_procedures(cursor) # If the database evolves over tiem, there could be # extra procs still there that we don't care about. installed = {k: v for k, v in installed.items() if k in expected} if installed != expected: logger.info( "Procedures incorrect, will reinstall. " "Expected: %s." "Actual: %s", expected, installed ) return False return True def __install_procedures(self, cursor): """Install the stored procedures""" self.__install_languages(cursor) # PostgreSQL procedures in the SQL language # do lots of validation at compile time; in particular, # they check that the functions they use in SELECT statements # actually exist. When we have procedures that call each other, # that means there's an order they have to be created in. # Rather than try to figure out what that order is, or encode it # in names somehow, we'll disable that validation for the duration # of this transaction (just like pg_dump.) cursor.execute('SET LOCAL check_function_bodies = off') for proc_name, stored_func in self.procedures.items(): __traceback_info__ = proc_name, self.keep_history proc_source = stored_func.create # All definitions should be written with 'CREATE OR REPLACE' # so we don't need to bother with 'DROP'. Though, if the return # type changes, we can't REPLACE. cursor.execute(proc_source) # Update checksums # Postgres < 10 requires the signature to identify the function; # after that it's optional if the function isn't overloaded. # Rather than try to parse the signature from the file ourself, we # let the database do it and then ask it. for db_proc in self.list_procedures(cursor).values(): # db_proc will have the signature but perhaps not the checksum. try: disk_proc = self.procedures[db_proc.name] except KeyError: # pragma: no cover # One in the DB no longer on disk. # Ignore. continue disk_proc.signature = db_proc.signature if db_proc.checksum == disk_proc.checksum: continue db_proc.checksum = disk_proc.checksum # For pg8000 we can't use a parameter here (because it prepares?) comment = "COMMENT ON FUNCTION %s IS '%s'" % ( str(db_proc), db_proc.checksum ) __traceback_info__ = comment cursor.execute(comment) def list_triggers(self, cursor): cursor.execute("SELECT tgname FROM pg_trigger") return self.__native_names_only(cursor) def __install_triggers(self, cursor): stmt = """ CREATE TRIGGER blob_chunk_delete BEFORE DELETE ON blob_chunk FOR EACH ROW EXECUTE PROCEDURE blob_chunk_delete_trigger() """ cursor.execute(stmt)
[docs] def drop_all(self): def delete_blob_chunk(_conn, cursor): if 'blob_chunk' in self.list_tables(cursor): # Trigger deletion of blob OIDs. cursor.execute("DELETE FROM blob_chunk") self.connmanager.open_and_call(delete_blob_chunk) super().drop_all()
def _create_pack_lock(self, cursor): return def _create_new_oid(self, cursor): stmt = """ CREATE SEQUENCE IF NOT EXISTS zoid_seq; """ self.runner.run_script(cursor, stmt) CREATE_PACK_OBJECT_IX_TMPL = """ CREATE INDEX pack_object_keep_false ON pack_object (zoid) WHERE keep = false; CREATE INDEX pack_object_keep_true ON pack_object (visited) WHERE keep = true; """ DROP_TABLE_TMPL = 'DROP TABLE IF EXISTS {table}' def _reset_oid(self, cursor): stmt = "ALTER SEQUENCE zoid_seq RESTART WITH 1;" self.runner.run_script(cursor, stmt) # Use the fast, semi-transactional way to truncate tables. It's # not MVCC safe, but "TRUNCATE is transaction-safe with respect to # the data in the tables: the truncation will be safely rolled # back if the surrounding transaction does not commit." _zap_all_tbl_stmt = 'TRUNCATE TABLE %s CASCADE' def _before_zap_all_tables(self, conn, cursor, tables, slow=False): super()._before_zap_all_tables(conn, cursor, tables, slow) if not slow and 'blob_chunk' in tables: # If we're going to be truncating, it's important to # remove the large objects through lo_unlink. We have a # trigger that does that, but only for DELETE. # The `vacuumlo` command cleans up any that might have been # missed. # This unfortunately results in executing a statement for each # object unlinked, but it should still be faster than # running a DELETE and firing the trigger for each row. # We need to take care to do this in chunks, though, # so as to not allocate all the locks available on the server # (which results in a confusing "out of shared memory" error). # See https://github.com/zodb/relstorage/issues/468 # For that reason, even a simple DELETE which *does* fire the # trigger isn't good enough. # XXX: When we run on PostgreSQL 11 and above only, we can # probably use a complete server-side ``DO`` loop, because # on that version you can COMMIT in the loop. # We first copy the blob_chunk info into the temporary # table so that we can DELETE from it as we go and keep # track of our chunks. Once we start unlinking the blobs, # we can't delete from the blob_chunk table without # breaking the existing trigger that wants to unlink. And # since we're spanning transactions here, we don't want to # disable the trigger. cursor.execute(""" CREATE TEMPORARY TABLE IF NOT EXISTS temp_zap_chunk( chunk oid primary key ); """) cursor.execute(""" INSERT INTO temp_zap_chunk SELECT DISTINCT chunk FROM blob_chunk; """) while True: cursor.execute(""" DO $$BEGIN PERFORM lo_unlink(chunk) FROM temp_zap_chunk ORDER BY chunk LIMIT 1000; DELETE FROM temp_zap_chunk WHERE chunk IN ( SELECT chunk FROM temp_zap_chunk ORDER BY chunk LIMIT 1000 ); END $$ LANGUAGE plpgsql; SELECT MIN(chunk) FROM temp_zap_chunk; """) cnt, = cursor.fetchone() logger.info("Unlinked 1000 blob chunks. More? %s", bool(cnt)) self.driver.commit(conn) if not cnt: # Now we must truncate because the trigger won't let # delete's happen. cursor.execute('TRUNCATE TABLE blob_chunk;') break