旧文重发,原文刊载于2006年6月8日。
近日工作需要,要大批量导出数据,开始用SQL*Plus的SPOOL功能。结果老是提示“xrealloc: cannot reallocate XXX bytes”。而且每次发生的时间还不同,有时候是到文件1G就出错,有时候是2G多出错。怀疑是32位Linux问题,换到Solaris 8上还是如故。客户端都是9201的,到Metalink上搜索没有结果。郁闷之余到Oracle-l上发了个邮件,收到一位高手的指点,说可能是内存泄露问题,SQL*Plus的开发人员估计也不会想到有人会导出如此大量数据。在邮件往复之间,那位高手提醒可以用UTL_FILE来导出。忽然想起Tom的两本书里都有提到用PL/SQL来导出,于是翻出书里的那个网址http://asktom.oracle.com/~tkyte/flat/index.html,进去一看,PL/SQL的效率比SQL*Plus要高,缺点是生成的文件必须在主机上。SQL*Plus可以调整arraysize来提高效率(当然经我实践大数据量时还有内存泄露之虞)。Pro*C是高效的方法,缺点是需要写程序、编译。
硬着头皮看Pro*C咯,好在Tom给了原始代码,抓下来放到一台Linux的机器上proc报错,应该是proc配置include库的问题,找到一个makefile,倒是没啥问题了,但是最后链接失败,报无法识别’sqlca’。到网上一搜,和我一样晕的人还有,要修改代码,多include进,但是看Tom的帖子似乎没提到这个问题,不知道为什么。
下面是修改后的源代码,我顺便在里面修改了会话的一些参数,以处理中文和时间格式:
#include #include #include #include #define MAX_VNAME_LEN 30 #define MAX_INAME_LEN 30 static char * USERID = NULL; static char * SQLSTMT = NULL; static char * ARRAY_SIZE = "10"; #define vstrcpy( a, b ) \ (strcpy( a.arr, b ), a.len = strlen( a.arr ), a.arr) EXEC SQL INCLUDE SQLCA; EXEC SQL INCLUDE sqlda; extern SQLDA *sqlald(); extern void sqlclu(); static void die( char * msg ) { fprintf( stderr, "%s\n", msg ); exit(1); } /* this array contains a default mapping I am using to constrain the lengths of returned columns. It is mapping, for example, the Oracle NUMBER type (type code = 2) to be 45 characters long in a string. */ static int lengths[] = { -1, 0, 45, 0, 0, 0, 0, 0, 2000, 0, 0, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 512, 2000 }; static void process_parms( argc, argv ) int argc; char * argv[]; { int i; for( i = 1; i < argc; i++ ) { if ( !strncmp( argv[i], "userid=", 7 ) ) USERID = argv[i]+7; else if ( !strncmp( argv[i], "sqlstmt=", 8 ) ) SQLSTMT = argv[i]+8; else if ( !strncmp( argv[i], "arraysize=", 10 ) ) ARRAY_SIZE = argv[i]+10; else { fprintf( stderr, "usage: %s %s %s\n", argv[0], "userid=xxx/xxx sqlstmt=query ", "arraysize=\n" ); exit(1); } } if ( USERID == NULL || SQLSTMT == NULL ) { fprintf( stderr, "usage: %s %s %s\n", argv[0], "userid=xxx/xxx sqlstmt=query ", "arraysize=\n" ); exit(1); } } static void sqlerror_hard() { EXEC SQL WHENEVER SQLERROR CONTINUE; fprintf(stderr,"\nORACLE error detected:"); fprintf(stderr,"\n% .70s \n", sqlca.sqlerrm.sqlerrmc); EXEC SQL ROLLBACK WORK RELEASE; exit(1); } static SQLDA * process_1(char * sqlstmt, int array_size ) { SQLDA * select_dp; int i; int j; int null_ok; int precision; int scale; int size = 10; fprintf( stderr, "Unloading '%s'\n", sqlstmt ); fprintf( stderr, "Array size = %d\n", array_size ); EXEC SQL WHENEVER SQLERROR DO sqlerror_hard(); EXEC SQL PREPARE S FROM :sqlstmt; EXEC SQL DECLARE C CURSOR FOR S; if ((select_dp = sqlald(size,MAX_VNAME_LEN,MAX_INAME_LEN)) == NULL ) die( "Cannot allocate memory for select descriptor." ); select_dp->N = size; EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp; if ( !select_dp->F ) return NULL; if (select_dp->F < 0) { size = -select_dp->F; sqlclu( select_dp ); if ((select_dp = sqlald (size, MAX_VNAME_LEN, MAX_INAME_LEN)) == NULL ) die( "Cannot allocate memory for descriptor." ); EXEC SQL DESCRIBE SELECT LIST FOR S INTO select_dp; } select_dp->N = select_dp->F; for (i = 0; i < select_dp->N; i++) select_dp->I[i] = (short *) malloc(sizeof(short) * array_size ); for (i = 0; i < select_dp->F; i++) { sqlnul (&(select_dp->T[i]), &(select_dp->T[i]), &null_ok); if ( select_dp->T[i] < sizeof(lengths)/sizeof(lengths[0]) ) { if ( lengths[select_dp->T[i]] ) select_dp->L[i] = lengths[select_dp->T[i]]; else select_dp->L[i] += 5; } else select_dp->L[i] += 5; select_dp->T[i] = 5; select_dp->V[i] = (char *)malloc( select_dp->L[i] * array_size ); for( j = MAX_VNAME_LEN-1; j > 0 && select_dp->S[i][j] == ' '; j--); fprintf (stderr, "%s%.*s", i?",":"", j+1, select_dp->S[i]); } fprintf( stderr, "\n" ); EXEC SQL OPEN C; return select_dp; } static void process_2( SQLDA * select_dp, int array_size ) { int last_fetch_count; int row_count = 0; short ind_value; char * char_ptr; int i, j; for ( last_fetch_count = 0; ; last_fetch_count = sqlca.sqlerrd[2] ) { EXEC SQL FOR :array_size FETCH C USING DESCRIPTOR select_dp; for( j=0; j < sqlca.sqlerrd[2]-last_fetch_count; j++ ) { for (i = 0; i < select_dp->F; i++) { ind_value = *(select_dp->I[i]+j); char_ptr = select_dp->V[i] + (j*select_dp->L[i]); printf( "%s%s", i?",":"", ind_value?"(null)":char_ptr ); } row_count++; printf( "\n" ); } if ( sqlca.sqlcode > 0 ) break; } sqlclu(select_dp); EXEC SQL CLOSE C; EXEC SQL COMMIT WORK; fprintf( stderr, "%d rows extracted\n", row_count ); } main( argc, argv ) int argc; char * argv[]; { EXEC SQL BEGIN DECLARE SECTION; VARCHAR oracleid[50]; EXEC SQL END DECLARE SECTION; SQLDA * select_dp; process_parms( argc, argv ); /* Connect to ORACLE. */ vstrcpy( oracleid, USERID ); EXEC SQL WHENEVER SQLERROR DO sqlerror_hard(); EXEC SQL CONNECT :oracleid; fprintf(stderr, "\nConnected to ORACLE as user: %s\n\n", oracleid.arr); EXEC SQL ALTER SESSION SET NLS_DATE_FORMAT = 'YYYYMMDDHH24:MI:SS'; EXEC SQL ALTER SESSION SET NLS_LANGUAGE = "SIMPLIFIED CHINESE"; select_dp = process_1( SQLSTMT, atoi(ARRAY_SIZE) ); process_2( select_dp , atoi(ARRAY_SIZE)); /* Disconnect from ORACLE. */ EXEC SQL COMMIT WORK RELEASE; exit(0); }
顺便附上我的makefile,在RHEL AS3上用的:
CC=gcc LD=ld OBJ=obj/ ##########数据库编译选项########### PROCFLAGS=unsafe_null=yes dynamic=ansi mode=oracle dbms=V8 parse=full\ release_cursor=no sqlcheck=SEMANTICS ireclen=512 include=./.\ sys_include=/usr/include sys_include=/usr/lib/gcc-lib/i386-redhat-linux/3.2.3/include \ def_sqlcode=yes INCL=-I/usr/include -I./. -I${ORACLE_HOME}/precomp/public BIN_LIBS= -lc -L${ORACLE_HOME}/lib -lclntsh ##########编译规则################ .SUFFIXES: .pc .o .c .c.o: ${CC} -g -c $*.c ${INCL} .pc.o: proc $(PROCFLAGS) iname=$*.pc $(CC) -g -c $*.c ${INCL} rm -f $*.c $*.lis all:unload unload:unload.o rm -f $@ ${CC} -o $@ unload.o \ $(BIN_LIBS) ######################################################### #编译公用目标文件 ######################################################### ########清理目标文件########################### clean: rm -f *.o *.lis
编译以后生成unload程序,执行如下:
unload userid=XXX/XXX sqlstmt='select * from dual' arraysize=100>XXX.DAT
这样DAT文件里就是数据了。
经我测试,在数据库主机上,30分钟导出了7GB的文件,强啊!效率上可以调整的一个是arraysize参数,这个一般100-200都可以,更大的除非你IO超强,不然也没有意义;另一个是所谓并行,其实这个C程序也就是执行SELECT语句,因此将原表改为PARALLEL就可以充分利用系统资源了^_^。