Wednesday, May 7, 2014

Oracle table (or query) CHECKSUM function

For a long time now, I've wanted table (or query) checksum functionality like the SQL Server "CHECKSUM_AGG" function (detailed here: http://technet.microsoft.com/en-us/library/ms188920.aspx).

I couldn't find anything "built-in" (if you do know of such a function, please comment below) - so I wrote my own.

Please note - I don't use MD5 - I use ORA_HASH - but I think this is "close enough for government work".  I multiply the output of ORA_HASH by the base-10 log of the column number in the result set (+1) so that:

SELECT 'X' AS x, 'Y' AS y FROM dual

and

SELECT 'Y' AS x, 'X' AS y FROM dual;

Do not have the same checksums...

Anyway - here goes - hopefully this works well for you:

 CREATE OR REPLACE FUNCTION marketplace."GET_QUERY_CHECKSUM" (
   p_query                   IN CLOB,
   p_output_checksum_query   IN INTEGER DEFAULT 0,
   p_use_result_cache_hint   IN INTEGER DEFAULT 0,
   p_primary_key_column_name IN all_tab_columns.column_name%TYPE DEFAULT NULL
)
   RETURN NUMBER
   AUTHID CURRENT_USER
IS
   /***********************************************************************************************
   Package :           GET_QUERY_CHECKSUM
   Author  :           Philip Moore
   Creation Date :     05-FEB-2014 A.D.
   Version :           1.0
   Purpose: This function will return a checksum for a query which is passed in.  It can be used
            to quickly determine if the result set of the query had changed.  There is a low
            risk for collisions (false positive match) - however the risk is quite low.
   ------------------------------------------------------------------------------------------------
   Modification History
   Date:            Version:     Modified by:        Modifications:
   ----------       --------     ------------        --------------------------------------------------
    ***********************************************************************************************/
   -- Variables
   l_theCursor                INTEGER DEFAULT DBMS_SQL.open_cursor;
   l_descTbl                  DBMS_SQL.desc_tab;
   l_colCnt                   NUMBER;
   l_return                   NUMBER;
   l_hash_column_sql          CLOB;
   l_this_column_sql          VARCHAR2 (32767);
   l_primary_key_column_sql   VARCHAR2 (32767);
   l_sql_stmt                 CLOB;
   -- Private-scope procedure
   PROCEDURE cleanup
   IS
   BEGIN
      IF DBMS_SQL.is_open (c => l_theCursor)
      THEN
         DBMS_SQL.close_cursor (c => l_theCursor);
      END IF;
   END cleanup;
BEGIN
   -- Parse the Query
   DBMS_SQL.parse (c               => l_theCursor,
                   STATEMENT       => p_query,
                   language_flag   => DBMS_SQL.native);
   -- Get the column names, data types, etc..
   DBMS_SQL.describe_columns (c         => l_theCursor,
                              col_cnt   => l_colCnt,
                              desc_t    => l_descTbl);
   -- Close our cursor
   DBMS_SQL.close_cursor (c => l_theCursor);
  -- Build our HASH SQL...
  <>
   FOR i IN 1 .. l_colCnt
   LOOP
      l_this_column_sql :=
      ' + ('
         || TO_CHAR (LOG (10, i + 1))
         || ' * '
         || CASE WHEN l_descTbl (i).col_null_ok THEN 'COALESCE (' END
         || 'ORA_HASH ('
         || DBMS_ASSERT.enquote_name (str => l_descTbl (i).col_name)
         || ')'
         || CASE
               WHEN l_descTbl (i).col_null_ok
               THEN
                     ', ORA_HASH ('
                  || DBMS_ASSERT.enquote_literal (
                        str   => l_descTbl (i).col_name)
                  || '))'
            END
         || ')';
      l_hash_column_sql :=
            l_hash_column_sql
         || l_this_column_sql;

      IF (    p_primary_key_column_name IS NOT NULL
          AND l_descTbl (i).col_name = p_primary_key_column_name
         )
         OR
         (    p_primary_key_column_name IS NULL
          AND i = 1
         ) THEN
         l_primary_key_column_sql := LTRIM (l_this_column_sql, ' +');
      END IF;
   END LOOP column_loop;
   l_sql_stmt :=
         'SELECT '
      || CASE
            WHEN p_use_result_cache_hint = 1 THEN '/*+ result_cache */ '
         END
      || 'COUNT (*) + ROUND (COALESCE (SUM ('
      || l_primary_key_column_sql
      || ' * ('
      || LTRIM (l_hash_column_sql, ' +')
      || ')), 0), 0) AS hash_total FROM ('
      || p_query
      || ')';
   IF (p_output_checksum_query = 1)
   THEN
      DBMS_OUTPUT.PUT_LINE (SUBSTR (l_sql_stmt, 1, 32767));
   END IF;
   EXECUTE IMMEDIATE l_sql_stmt INTO l_return;
   RETURN l_return;
EXCEPTION
   WHEN OTHERS
   THEN
      cleanup ();
      RAISE;
END get_query_checksum;
/




Demo of usage:
SELECT get_query_checksum (p_query  => 'SELECT * FROM all_objects') AS checksum
FROM dual;