Tuesday, June 22, 2010

Tom Kyte's Pro*C "array_flat" CSV file Unloader - with enhancements

Hi all,

It's been a long time since I've posted (I've been a lazy blogger).

Recently I had the need to unload a CSV file from a rather complex query in Oracle 10g Release 2. I just could not find a SQL*PLus solution that suited me, so I stumbled upon Tom Kyte's "array_flat" solution - found here in its orginal glory:
http://asktom.oracle.com/pls/asktom/f?p=100:11:0::::P11_QUESTION_ID:459020243348

Tom's solution uses Pro*C code to connect to Oracle and then dump the output of a query to the stdout, while dumping the query text and column headings to the stderr.

Tom's solution of course is the bomb, but I wanted to improve upon it by adding the ability to change the delimiter character, and optionally specify an enclosure character (such as double-quote for example), and also have the ability to designate what should be output in the case of NULL values.

I'm VERY new to Pro*C programming, and I suspect others are too - so I'm posting in hopes that this helps some other poor souls that are Pro*C newbs also.

Ok - so here's the modified code - shamelessly taken from Tom's code found in the URL above - and modified to add the enhancements mentioned earlier:

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

#define MAX_VNAME_LEN 30
#define MAX_INAME_LEN 30

static char * USERID = NULL;
static char * SQLSTMT = NULL;
static char * ARRAY_SIZE = "10";
static char * DELIMITER = ",";
static char * ENCLOSURE = "\"";
static char * NULL_STRING = NULL;

#define vstrcpy( a, b ) \
(strcpy( a.arr, b ), a.len = strlen( a.arr ), a.arr)

EXEC SQL INCLUDE sqlca;
EXEC SQL INCLUDE sqlda;

extern SQLDA *sqlald();
extern void sqlclu();



static void die( char * msg )
{
fprintf( stderr, "%s\n", msg );
exit(1);
}


/*
this array contains a default mapping
I am using to constrain the
lengths of returned columns. It is mapping,
for example, the Oracle
NUMBER type (type code = 2) to be 45 characters
long in a string.
*/

static int lengths[] =
{ -1, 0, 45, 0, 0, 0, 0, 0, 2000, 0, 0,
18, 25, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 512, 2000 };


static void process_parms( argc, argv )
int argc;
char * argv[];
{
int i;

for( i = 1; i < argc; i++ )
{
if ( !strncmp( argv[i], "userid=", 7 ) )
USERID = argv[i]+7;
else
if ( !strncmp( argv[i], "sqlstmt=", 8 ) )
SQLSTMT = argv[i]+8;
else
if ( !strncmp( argv[i], "arraysize=", 10 ) )
ARRAY_SIZE = argv[i]+10;
else
if ( !strncmp( argv[i], "delimiter=", 10 ) )
DELIMITER = argv[i]+10;
else
if ( !strncmp( argv[i], "enclosure=", 10 ) )
ENCLOSURE = argv[i]+10;
else
if ( !strncmp( argv[i], "null_string=", 12 ) )
NULL_STRING = argv[i]+12;
else
{
fprintf( stderr,
"usage: %s %s %s %s %s %s\n",
argv[0],
"userid=xxx/xxx sqlstmt=query ",
"arraysize=<NN> ",
"delimiter=x ",
"enclosure=x ",
"null_string=x ");
exit(1);
}
}
if ( USERID == NULL || SQLSTMT == NULL )
{
fprintf( stderr,
"usage: %s %s %s %s %s %s\n",
argv[0],
"userid=xxx/xxx sqlstmt=query ",
"arraysize=<NN> ",
"delimiter=x ",
"enclosure=x ",
"null_string=x ");
exit(1);
}
}

static void sqlerror_hard()
{
EXEC SQL WHENEVER SQLERROR CONTINUE;

fprintf(stderr,"\nORACLE error detected:");
fprintf(stderr,"\n% .70s \n", sqlca.sqlerrm.sqlerrmc);

EXEC SQL ROLLBACK WORK RELEASE;
exit(1);
}



static SQLDA * process_1(char * sqlstmt, int array_size, char * delimiter, char * enclosure )
{
SQLDA * select_dp;
int i;
int j;
int null_ok;
int precision;
int scale;
int size = 10;

fprintf( stderr, "Unloading '%s'\n", sqlstmt );
fprintf( stderr, "Array size = %d\n", array_size );


EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();
EXEC SQL PREPARE S FROM :sqlstmt;
EXEC SQL DECLARE C CURSOR FOR S;

if ((select_dp = sqlald(size,MAX_VNAME_LEN,MAX_INAME_LEN))
== NULL )
die( "Cannot allocate memory for select descriptor." );

select_dp->N = size;
EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;
if ( !select_dp->F ) return NULL;

if (select_dp->F < 0)
{
size = -select_dp->F;
sqlclu( select_dp );
if ((select_dp =
sqlald (size, MAX_VNAME_LEN, MAX_INAME_LEN))
== NULL )
die( "Cannot allocate memory for descriptor." );
EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp;
}
select_dp->N = select_dp->F;

for (i = 0; i < select_dp->N; i++)
select_dp->I[i] = (short *) malloc(sizeof(short) *
array_size );

for (i = 0; i < select_dp->F; i++)
{
sqlnul (&(select_dp->T[i]),
&(select_dp->T[i]), &null_ok);
if ( select_dp->T[i] <
sizeof(lengths)/sizeof(lengths[0]) )
{
if ( lengths[select_dp->T[i]] )
select_dp->L[i] = lengths[select_dp->T[i]];
else select_dp->L[i] += 5;
}
else select_dp->L[i] += 5;

select_dp->T[i] = 5;
select_dp->V[i] = (char *)malloc( select_dp->L[i] *
array_size );

for( j = MAX_VNAME_LEN-1;
j > 0 && select_dp->S[i][j] == ' ';
j--);
fprintf (stderr,
"%s%.*s", i?",":"", j+1, select_dp->S[i]);
}
fprintf( stderr, "\n" );


EXEC SQL OPEN C;
return select_dp;
}


static void process_2( SQLDA * select_dp, int array_size, char * delimiter, char * enclosure, char * null_string )
{
int last_fetch_count;
int row_count = 0;
short ind_value;
char * char_ptr;
int i,
j;

for ( last_fetch_count = 0;
;
last_fetch_count = sqlca.sqlerrd[2] )
{
EXEC SQL FOR :array_size FETCH C
USING DESCRIPTOR select_dp;

for( j=0; j < sqlca.sqlerrd[2]-last_fetch_count; j++ )
{
for (i = 0; i < select_dp->F; i++)
{
ind_value = *(select_dp->I[i]+j);
char_ptr = select_dp->V[i] +
(j*select_dp->L[i]);

printf( "%s%s%s%s", i?delimiter:"",
enclosure,
ind_value?null_string:char_ptr,
enclosure );
}
row_count++;
printf( "\n" );
}
if ( sqlca.sqlcode > 0 ) break;
}

sqlclu(select_dp);

EXEC SQL CLOSE C;

EXEC SQL COMMIT WORK;
fprintf( stderr, "%d rows extracted\n", row_count );
}



main( argc, argv )
int argc;
char * argv[];
{
EXEC SQL BEGIN DECLARE SECTION;
VARCHAR oracleid[50];
EXEC SQL END DECLARE SECTION;
SQLDA * select_dp;


process_parms( argc, argv );

/* Connect to ORACLE. */
vstrcpy( oracleid, USERID );

EXEC SQL WHENEVER SQLERROR DO sqlerror_hard();

EXEC SQL CONNECT :oracleid;
fprintf(stderr, "\nConnected to ORACLE as user: %s\n\n",
oracleid.arr);

EXEC SQL ALTER SESSION
SET NLS_DATE_FORMAT = 'DD-MON-YYYY HH24:MI:SS';

select_dp = process_1( SQLSTMT, atoi(ARRAY_SIZE), DELIMITER, ENCLOSURE );
process_2( select_dp , atoi(ARRAY_SIZE), DELIMITER, ENCLOSURE, NULL_STRING );

/* Disconnect from ORACLE. */
EXEC SQL COMMIT WORK RELEASE;
exit(0);
}



Ok - now you may ask - how the heck do I compile the thing? This is where I had my trouble as well.

Well - Here are the steps I took to get it to compile and working:
1) Save the code to a file - in my case I chose "unloader.pc"
2) Create a folder in your home directory on the UNIX server where you are going to compile this code. For example: "mkdir /home/phil/unloader".
3) FTP the file in ascii mode to the UNIX server directory created in step 2 above. Make sure that the server has the Oracle client installed. In my case I used Oracle 10g Release 2 client (10.2.0.3). Make sure you've run ". oraenv" to have all of your Oracle environment variables set up!
4) Copy the contents of directory $ORACLE_HOME/precomp/demo/proc/ to my directory created in step 2 - example unix command:
"cp $ORACLE_HOME/precomp/demo/proc/*.* /home/phil/unloader"
5) Modify your personal copy of the "demo_proc.mk" file - by adding our new Pro*C file to the list defined by the keyword "SAMPLES" - like so:


# SAMPLES is a list of the c proc sample programs.
# CPPSAMPLES is a list of the c++ proc sample programs.
# OBJECT_SAMPLES is a list of the proc sample programs using the new
# type features. You must install ott in addition to proc to run some
# of those samples.
SAMPLES=unloader sample1 sample2 sample3 sample4 sample6 sample7 sample8 \
sample9 sample10 sample11 sample12 oraca sqlvcp cv_demo \
ansidyn1 ansidyn2 cpdemo1 cpdemo2 scdemo1 scdemo2



6) Set your "LD_LIBRARY_PATH" environment variable to include: "$ORACLE_HOME/lib" (if it doesn't already have it) - you can do that like this:
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib

7) Now you're ready to compile - just issue this command:
make -f ./demo_proc.mk unloader

After it completes (you may get warnings - but this is ok) - you should have an executable file called "unloader" with no extension. If you have this file - your compile worked - and you should be ready for execution.
-------------------------------

Now for some execution examples:

Example 1 - Unload a simple query against the dual table:

[phil@unixserver:/home/phil/unloader]
$ ./unloader userid="scott/tiger@db" sqlstmt="select * from dual" arraysize=100 \
> delimiter="|" enclosure='"' null_string="(null)"

Connected to ORACLE as user: scott/tiger@db

Unloading 'select * from dual'
Array size = 100
DUMMY
"X"
1 rows extracted



Example 2 - Unload a bigger SQL Query from a file - send the data to one file - and the query and column headings to another file:
Step 1) Store our query in a file - like so:
echo "SELECT 1 AS num_field, 'ABC' AS text_field FROM dual" > ./query.sql
Step 2) Run this command:

[phil@unixserver:/home/phil/unloader]
$ ./unloader userid="scott/tiger@db" sqlstmt="`cat ./query.sql`" \
> arraysize=100 delimiter="|" enclosure='"' null_string="(null)" 1>./stdout.csv 2>./stderr.txt


Here is the output from stdout.csv:
[phil@unixserver:/home/phil/unloader]
$ cat stdout.csv
"1"|"ABC"


Here is the output from stderr.txt:
[phil@unixserver:/home/phil/unloader]
$ cat stderr.txt

Connected to ORACLE as user: scott/tiger@db

Unloading 'SELECT 1 AS num_field, 'ABC' AS text_field FROM dual'
Array size = 100
NUM_FIELD,TEXT_FIELD
1 rows extracted



-------------------

I hope this has been useful. I know that Pro*C can be intimidating - hopefully this example can make it a little easier for you. I am still very new, and I know there is probably a WAY better way to compile the Pro*C code - but hey - this worked!

This solution was tested on an HP-UX 64-bit Itanium server - and worked with both Oracle 9.2 and 10.2 Databases.

Good luck, and God bless!.

Phil