///////////////////////////////////////////////////////////////////////////////
//                                                         
// CegoBufferPool.cc
// -----------------
// Cego buffer pool implementation module
//
// Design and Implementation by Bjoern Lemke               
//     
// (C)opyright 2000-2025 Bjoern Lemke
//
// IMPLEMENTATION MODULE
//
// Class: CegoBufferPool
// 
// Description: The buffer pool management class
//
// Status: CLEAN
//
///////////////////////////////////////////////////////////////////////////////

// LFC INCLUDES
#include <lfcbase/Exception.h>
#include <lfcbase/CommandExecuter.h>
#include <lfcbase/Datetime.h>
#include <lfcbase/Sleeper.h>
#include <lfcbase/ThreadLock.h>

// CEGO INCLUDES
#include "CegoBufferPool.h"
#include "CegoDefs.h"
#include "CegoXMLdef.h"
#include "CegoCheckpointDump.h"

// POSIX INCLUDES
#include <string.h>
#include <stdlib.h>

#define BUFFERPOOLHEAD_SIZE (((sizeof(BufferPoolHead)-1)/BUPMNG_ALIGNMENT)+1)*BUPMNG_ALIGNMENT
#define BUFFERHEAD_SIZE (((sizeof(BufferHead)-1)/BUPMNG_ALIGNMENT)+1)*BUPMNG_ALIGNMENT

static ThreadLock _lmLock[TABMNG_MAXTABSET];
extern bool __lockStatOn;

CegoBufferPool::CegoBufferPool(const Chain& xmlDef, const Chain& logFile, const Chain& progName) :  CegoLogManager(xmlDef, logFile, progName)
{    
    _pBufPool = 0;
    _numDiskRead=0;
    _numDiskWrite=0;
    _fixCount=0;
    _fixTry=0;
    _avgReadDelay=0;
    _avgWriteDelay=0;
    _cpCount = 0;
    
    for (int i=0;i< TABMNG_MAXTABSET;i++)
    {
	_lmLock[i].init(LCKMNG_LOCKWAITDELAY, __lockStatOn);
	_lmLock[i].setId( Chain("LMLOCK") + Chain("-") + Chain(i));
    }
    
    Datetime ts;
    _statStart = ts.asLong();
    _poolStart = ts.asLong();
    _modId = getModId("CegoBufferPool");
}

CegoBufferPool::~CegoBufferPool()
{
}

void CegoBufferPool::initPool(unsigned long long numSegment, unsigned long long numPages)
{
#ifdef DEBUG
    log(_modId, Logger::DEBUG, Chain("Reading xml def ..."));
#endif

    Chain dbName = getDbName();
    int pageSize = getPageSize();
    _maxFixTries = getMaxFixTries();
    _maxPageDelete = getMaxPageDelete();
    
    _dbName = dbName;
    _numSegment = numSegment;
    _numPages = numPages;
    _pageSize = pageSize;

    if (_pBufPool == 0)
    {
	log(_modId, Logger::NOTICE, Chain("Allocating ") + Chain(_numSegment) + Chain(" buffer pool segments ( each ") +  Chain(_numPages) + Chain(" pages ) ..."));	
	_pBufPool = (void**)malloc(_numSegment * sizeof(void*));
	if ( _pBufPool == NULL )
	{
	    throw Exception(EXLOC, "Cannot initialize pool");
	}
	
	for ( unsigned long long i=0; i<_numSegment; i++)
	{
	    _pBufPool[i] = malloc(_numPages * (BUFFERHEAD_SIZE + _pageSize) + BUFFERPOOLHEAD_SIZE);

	    if ( _pBufPool[i] == NULL )
	    {
		throw Exception(EXLOC, "Cannot initialize pool");
	    }
	    
	    log(_modId, Logger::NOTICE, Chain("Initializing buffer pool pages for segment ") + Chain(i) + Chain(" ..."));
	
	    void *base = (void*)_pBufPool[i];
	    void *ptr = (void*)_pBufPool[i];
	    
	    BufferPoolHead bph;
	    bph.numPages=_numPages;

	    memcpy(base, &bph, sizeof(BufferPoolHead));
	    
	    ptr = (void*)(BUFFERPOOLHEAD_SIZE + (long long)base);	
	    
	    BufferHead ibh;
	    ibh.isOccupied = NOT_OCCUPIED;
	    ibh.isDirty = 0;
	    ibh.numFixes = 0;
	    ibh.tabSetId = 0;
	    ibh.pageId = 0;
	    ibh.fixStat = 0;
	    ibh.numUsage = 0;
	    
	    for (unsigned long long i = 0; i<_numPages; i++)
	    {
		memcpy(ptr, &ibh, BUFFERHEAD_SIZE );
		ptr = (void*)((long long)ptr + BUFFERHEAD_SIZE + _pageSize);
	    }
	    
	    log(_modId, Logger::NOTICE, Chain("Buffer pool initialized"));
	}
    }
    else
    {
	throw Exception(EXLOC, "Buffer pool already created");
    }    
}

void CegoBufferPool::removePool()
{
    if (_pBufPool)
    {
	log(_modId, Logger::NOTICE, Chain("Removing pool ..."));
	
	for ( unsigned long long i=0; i<_numSegment; i++)
	{
	    free ( _pBufPool[i] );	   
	}
	free ( _pBufPool );
	
	_pBufPool = 0;
	log(_modId, Logger::NOTICE, Chain("Pool removed"));
    }
}

Chain CegoBufferPool::getDBName()
{
    return _dbName;
}

void CegoBufferPool::bufferFix(CegoBufferPage &bp, int tabSetId, PageIdType pageId, FixMode m, CegoLockHandler *pLockHandle, int numTry)
{	    
    _fixCount++;

    if ( _fixCount % BUPMNG_STATSPERIOD == 0 )
    {
	resetStats();
    }

    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }
    
    int segid = calcSegment(pageId);

    void *base = (void*)_pBufPool[segid];

    int hashId = calcHash(pageId);
    
    // cout << "PageId=" << pageId << ", Seg=" << segid << ", HashId=" << hashId << endl;
    
    void* bufPtr = (void*)((long long)base + (long long)BUFFERPOOLHEAD_SIZE + (long long )hashId * (long long)( BUFFERHEAD_SIZE + _pageSize ));
    
    bool isFixed = false;
    
    int numTries = 0;

    // Step I : searching for possible slot
	
    while ( isFixed == false && numTries < _maxFixTries )
    {
	_fixTry++;
	
	BufferHead* pBH = (BufferHead*)bufPtr;

	pLockHandle->lockBufferPool(hashId, CegoLockHandler::WRITE);
	
	if ( pBH->isOccupied != NOT_OCCUPIED && pBH->pageId == pageId)
	{
	    isFixed = true;
	    
	    if ( m == CegoBufferPool::PERSISTENT )
	       pBH->isOccupied = PERSISTENT_OCCUPIED;
	    else if ( m == CegoBufferPool::NOSYNC && pBH->isOccupied == WRITE_ON_SYNC)
	       pBH->isOccupied = WRITE_ON_DIRTY;
	    else
	    {
		pBH->isOccupied = WRITE_ON_SYNC;
	    }
	    
	    pBH->numFixes++;
	    pBH->fixStat++;
	    pBH->numUsage++;

	    pLockHandle->unlockBufferPool(hashId);
	}
	else 
	{
	    pLockHandle->unlockBufferPool(hashId);
	    
	    hashId =  ( hashId + 1 ) % _numPages;
	    bufPtr = (void*)( (long long)hashId * (long long)( BUFFERHEAD_SIZE + _pageSize ) + (long long)base + (long long)BUFFERPOOLHEAD_SIZE );
	    numTries++;
	}	
    }

    // Step II : page is not in buffercache yet, searching free and not occupied slot 

    void* minFixStatBufPtr = 0;
    int minHashId = -1;

    if ( ! isFixed )
    {
	numTries = 0;
	hashId = calcHash(pageId);
	bufPtr = (void*)((long long)base + (long long)BUFFERPOOLHEAD_SIZE + (long long)hashId * (long long)( BUFFERHEAD_SIZE + _pageSize ));
   
	int minFixStat = -1;
	
	while ( ! isFixed && numTries < _maxFixTries )
	{
	    try
	    {
		pLockHandle->lockBufferPool(hashId, CegoLockHandler::WRITE);
	    }
	    catch ( Exception e )
	    {
		if ( minHashId != -1 )
		    pLockHandle->unlockBufferPool(minHashId);
		throw Exception(EXLOC, "Cannot lock bufferpool", e);
	    }
	    
	    // cout << "Trying to fix with try " << numTries << endl;
	    
	    BufferHead* pBH = (BufferHead*)bufPtr;
	    
	    if ( pBH->isOccupied == NOT_OCCUPIED )
	    {
		if ( m == CegoBufferPool::PERSISTENT )
		    pBH->isOccupied = PERSISTENT_OCCUPIED;
		else if ( m == CegoBufferPool::SYNC )
		    pBH->isOccupied = WRITE_ON_SYNC;
		else 
		    pBH->isOccupied = WRITE_ON_DIRTY;
		
		pBH->pageId = pageId;
		pBH->isDirty = 0;
		pBH->numFixes = 1;
		
		try
		{		
		    _numDiskRead++;
		    _diskReadTimer.start();
		    readPage(pageId, pBH->tabSetId, pBH->fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE), pLockHandle);
		    _diskReadTimer.stop();
		    _avgReadDelay = ( _diskReadTimer.getSum() /  (_numDiskRead+(unsigned long long)1)  ) / 1000;
		}
		catch ( Exception e )
		{
		    pLockHandle->unlockBufferPool(hashId);
		    if ( minHashId != -1 )
			pLockHandle->unlockBufferPool(minHashId);
		    throw Exception(EXLOC, Chain("Cannot read page to buffer"), e);
		}
		
		pBH->fixStat++;		
		pBH->numUsage++;
		
		isFixed = true;
		
		pLockHandle->unlockBufferPool(hashId);
		if ( minHashId != -1 )
		    pLockHandle->unlockBufferPool(minHashId);		
	    }
	    else 
	    {		
		if ( ( ( pBH->isOccupied != PERSISTENT_OCCUPIED && pBH->isDirty == 0 )
		       || pBH->isOccupied == WRITE_ON_DIRTY )
		     && pBH->numFixes == 0 && ( minFixStat > (int)pBH->fixStat || minFixStat == -1))
		{
		    minFixStatBufPtr = bufPtr;
		    // cout << "Found page with minFixStat = " << bh.fixStat << endl;
		    minFixStat = pBH->fixStat;
		    
		    if ( minHashId != -1 )
		    {
			pLockHandle->unlockBufferPool(minHashId);	   
		    }
		    minHashId = hashId;	    
		}
		else
		{	    
		    pLockHandle->unlockBufferPool(hashId);
		}
		
		hashId =  ( hashId + 1 ) % _numPages;
		
		bufPtr = (void*)( (long long)hashId * (long long)( BUFFERHEAD_SIZE + _pageSize ) + (long long)base + (long long)BUFFERPOOLHEAD_SIZE );
		numTries++;
	    }	
	}
	
	// Step III : page is not in buffercache yet and all slots are occupied, using the occupied slot
	//            with lowest fix stat 
	
	if ( ! isFixed && minFixStatBufPtr != 0)
	{
	    bufPtr = minFixStatBufPtr;
         
	    // minHashId is already locked
	    // pLockHandle->lockBufferPool(minHashId);
	    
	    BufferHead *pBH = (BufferHead*)bufPtr;
	    
	    if ( pBH->isOccupied == WRITE_ON_DIRTY && pBH->isDirty == 1 )
	    {
		_numDiskWrite++;		
#ifdef DEBUG
		log(_modId, Logger::DEBUG, Chain("Async write of page [") + Chain(pBH->pageId) + Chain("]"));
#endif
		
		// cout << "---- >>>> Async write of page [" << bh.fileId << "," << bh.pageId << "]" << endl;
		
		_diskWriteTimer.start();
		writePage(pBH->pageId, pBH->fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE), pLockHandle);
		_diskWriteTimer.stop();
		
		_avgWriteDelay = ( _diskWriteTimer.getSum() /  (_numDiskWrite+(unsigned long long)1)  ) / 1000;		
	    }
	
	    // cout << "Using occupied page ( type " << bh.isOccupied << " ) with fileId " << bh.fileId << " pageId " << bh.pageId << " numFixes " << bh.numFixes << endl;

	    if ( m == CegoBufferPool::PERSISTENT )
		pBH->isOccupied = PERSISTENT_OCCUPIED;
	    else if ( m == CegoBufferPool::SYNC )
		pBH->isOccupied = WRITE_ON_SYNC;
	    else if ( m == CegoBufferPool::NOSYNC )
		pBH->isOccupied = WRITE_ON_DIRTY;
	    
	    pBH->pageId = pageId;
	    pBH->isDirty = 0;
	    pBH->numFixes = 1;
	    
	    try
	    {
		_numDiskRead++;
		_diskReadTimer.start();
		readPage(pageId, pBH->tabSetId, pBH->fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE), pLockHandle);
		_diskReadTimer.stop();
		
		_avgReadDelay = ( _diskReadTimer.getSum() /  (_numDiskRead+(unsigned long long)1) ) / 1000;		
	    }
	    catch ( Exception e )
	    {
		pLockHandle->unlockBufferPool(minHashId);
		throw Exception(EXLOC, Chain("Cannot read page to buffer"), e);
	    }
	    pBH->fixStat++;
	    pBH->numUsage++;
	    
	    pLockHandle->unlockBufferPool(minHashId);
	    
	    isFixed = true;	    
	}

	if ( ! isFixed )
	{
	    if ( numTry >= 2 )
		throw Exception(EXLOC, "No more buffers available");

	    Chain tableSet = getTabSetName(tabSetId);
	    log(_modId, Logger::NOTICE, Chain("Forced checkpoint by bufferFix for tableset ") + tableSet);

	    if ( numTry == 0 )
		writeCheckPoint(tabSetId, true, Chain(), 0,  pLockHandle);
	    
	    if ( numTry == 1 )
	    {
		// we force checkpoints for all active tableset, 
		ListT<int> tsList = getOnlineTableSet();
		int* pTS = tsList.First();
		while ( pTS )
		{
		    if ( *pTS != tabSetId )
		    {
			Chain foreignTableSet = getTabSetName(*pTS);
			log(_modId, Logger::NOTICE, Chain("Forced checkpoint by bufferFix for foreign tableset ") + foreignTableSet);
			writeCheckPoint(*pTS, true, Chain(), 0,  pLockHandle);
		    }
		    pTS = tsList.Next();
		}
	    }

	    numTry++;
	    
	    // try again
	    return bufferFix(bp, tabSetId, pageId, m, pLockHandle, numTry);	    
	}
    }
    
    bp.setPageSize(_pageSize);
    bp.setPagePtr((char*)((long long)bufPtr + BUFFERHEAD_SIZE));
    bp.setPageHead((CegoBufferPage::PageHead*)((long long)bufPtr + BUFFERHEAD_SIZE));
    bp.setPageId(pageId);
    bp.setFixed(true);
}

void CegoBufferPool::emptyFix(CegoBufferPage &bp, int tabSetId, FixMode m, CegoFileHandler::FileType ft, CegoLockHandler *pLockHandle, bool doAppend)
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }
    
    PageIdType pageId;
    
    unsigned* fbm;
    int fbmSize=0;

    allocatePage(tabSetId, ft, pageId, pLockHandle, fbm, fbmSize, doAppend);

    if ( fbmSize > 0 )
    {
	int fileId = getFileIdForPageId(pageId);
	logBM(tabSetId, fileId, fbm, fbmSize, pLockHandle);
	delete[] fbm;
    }

    bufferFix(bp, tabSetId, pageId, m, pLockHandle);
    bp.initPage(CegoBufferPage::TABLE);
    bp.setFixed(true);
}

void CegoBufferPool::bufferUnfix(CegoBufferPage &bp, bool isDirty, CegoLockHandler *pLockHandle)
{     
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }

    int segid = calcSegment(bp.getPageId());
    
    void *base = (void*)_pBufPool[segid];
    char* bufPtr = (char*)bp.getPagePtr();
    int hashId = ( (long long)bufPtr - (long long)base - BUFFERPOOLHEAD_SIZE ) / (  BUFFERHEAD_SIZE + _pageSize );
    
    bufPtr = bufPtr - BUFFERHEAD_SIZE;
    
    pLockHandle->lockBufferPool(hashId, CegoLockHandler::WRITE);

    BufferHead* pBH = (BufferHead*)bufPtr;
    
    if ( pBH->numFixes > 0 )
    {
	if ( isDirty )
	{
	    pBH->isDirty = 1;
	}
	pBH->numFixes--;
    }
    else
    {	
	pLockHandle->unlockBufferPool(hashId);
	throw Exception(EXLOC, "Number of fixes is already zero");
    }
    
    bp.setFixed(false);
    
    pLockHandle->unlockBufferPool(hashId);
}    

void CegoBufferPool::bufferRelease(CegoBufferPage &bp, CegoLockHandler *pLockHandle)
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }
    
    char* bufPtr = (char*)bp.getPagePtr();
    bufPtr = bufPtr - BUFFERHEAD_SIZE;
    
    BufferHead bh;
    
    int hashId = calcHash(bp.getPageId());
    
    pLockHandle->lockBufferPool(hashId, CegoLockHandler::WRITE);

    int tabSetId = 0;
    
    try
    {
	memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);

	// save tabSetId value
	tabSetId = bh.tabSetId;

	bh.isOccupied = NOT_OCCUPIED;
	bh.numFixes=0;
	bh.isDirty = 0;
	bh.tabSetId = 0;
	bh.pageId = 0;
	bh.fixStat = 0;
	bh.numUsage = 0;
	       
	memcpy(bufPtr, &bh, BUFFERHEAD_SIZE);

	// we just collect freed page ids
	// they are physically deleted at next checkpoint
	_deletedPageList.Insert(bp.getPageId());
	
	pLockHandle->unlockBufferPool(hashId);
    }
    catch ( Exception e )
    {
	pLockHandle->unlockBufferPool(hashId);
	throw Exception(EXLOC, "Cannot release file page", e);
    }
    bp.setFixed(false);

    if ( _deletedPageList.Size() > _maxPageDelete )
    {
	Chain tableSet = getTabSetName(tabSetId);
	log(_modId, Logger::NOTICE, Chain("Forced checkpoint by bufferRelease for tableset ") + tableSet);
	writeCheckPoint(tableSet, true, false, pLockHandle);
    }
}

unsigned long long CegoBufferPool::writeCheckPoint(const Chain& tableSet, bool switchLog, bool archComplete, CegoLockHandler *pLockHandle, const Chain& escCmd, int escTimeout, int archTimeout)
{
    int tabSetId = getTabSetId(tableSet);    

    log(_modId, Logger::NOTICE, Chain("Writing checkpoint for tableset ") + tableSet + Chain(" as lsn = ") 
	+ Chain(getCurrentLSN(tabSetId)) + Chain(" ..."));    
    
    unsigned long long lsn = writeCheckPoint(tabSetId, switchLog, escCmd, escTimeout, pLockHandle);

    Datetime tsStart;
    
    long long archExceed = tsStart.asLong() + archTimeout;
    if ( archComplete )
    {
	while ( archiveComplete(tableSet) == false )
	{
	    log(_modId, Logger::NOTICE, Chain("Waiting to complete archiving in tableset ") + tableSet + Chain(" ...")); 

	    Datetime tsNow;
	    if ( tsNow.asLong() > archExceed )
	    {
		throw Exception(EXLOC, "Archiving timeout reached");		
	    }
	    Sleeper s;
	    s.secSleep(LOGMNG_LOGSWITCH_WAIT_DELAY);
	}
    }
    return lsn;
}

unsigned long long CegoBufferPool::writeCheckPoint(int tabSetId, bool switchLog, const Chain& escCmd, int timeout, CegoLockHandler *pLockHandle)
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }

    try
    {	
	pLockHandle->lockBufferPool();
	
	unsigned long long lsn;
		
	if ( escCmd != Chain() )
	{
	    log(_modId, Logger::NOTICE, Chain("Executing escape command <") + escCmd + Chain(">"));
	    
	    char *pShell = getenv(CGEXESHELLVARNAME);
	    
	    Chain shellCmd;
	    if ( pShell == NULL )
	    {
		shellCmd = Chain(CGSTDEXESHELL);
	    }
	    else
	    {
		shellCmd = Chain(pShell);
	    }
	    
	    CommandExecuter cmdExe(shellCmd);
	    
	    try 
	    {
		int retCode = cmdExe.execute(escCmd, timeout);
		log(_modId, Logger::NOTICE, Chain("Escape command finished with return code : <") + Chain(retCode) + Chain(">"));
	    }
	    catch ( Exception e )
	    {
		Chain msg;
		e.pop(msg);
		log(_modId, Logger::LOGERR, msg);
		throw Exception(EXLOC, msg);
	    }
	}
	
	PageIdType *pDelPage = _deletedPageList.First();
	while ( pDelPage )
	{
	    int fbmSize=0;
	    unsigned* fbm;
	    
	    // cout << "Releasing page " << *pDelPage << endl;
	    
	    releasePage(*pDelPage, pLockHandle, fbm, fbmSize);
	    
	    if ( fbmSize > 0 )
	    {
		int fileId = getFileIdForPageId(*pDelPage);
		logBM(tabSetId, fileId, fbm, fbmSize, pLockHandle);
		delete[] fbm;
	    }
	    pDelPage = _deletedPageList.Next();
	}
	
	// and reset the list
	_deletedPageList.Empty();
		
	// is checkpoint dump enabled for tableset ?
	
	if ( checkPointDumpEnabled(tabSetId) )
	{
	    // free all collected deleted page id's
	    // we don't treat this in dump, since a crash during checkpoint
	    // might just result in page leaks which can be corrected via cleanup startup option
	    
	    dumpCheckpoint(tabSetId);
		
	    if ( switchLog )
	    {      	    
		// set commited lsn to current lsn to sync with switchLogFile if log is active
		lsn = getCurrentLSN(tabSetId); 	    
		setCommittedLSN(tabSetId, lsn);
		
		while ( switchLogFile(tabSetId) == false )
		{		
		    log(_modId, Logger::NOTICE, Chain("Logfile for tabSetId ") + Chain(tabSetId) + Chain(" still active, switch failed"));		
		    Sleeper s;
		    s.secSleep(LOGMNG_LOGSWITCH_WAIT_DELAY);		
		}
	    }
	    else
	    {
		// we write a sync in any case
		CegoLogRecord lr;
		lr.setAction(CegoLogRecord::LOGREC_SYNC);    
		logAction(tabSetId, lr);	    
	    }

	    lsn = getCurrentLSN(tabSetId); 	    
	    setCommittedLSN(tabSetId, lsn);

	    // force sync
	    doc2Xml();
    
	    // now dump to data file
	    commitCheckpoint(tabSetId, pLockHandle);	    
	}
	else // ( checkPointDumpEnabled(tabSetId) == false )
	{
	    if ( switchLog )
	    {
		// set lsn before logfile switch, the method switchLogFile synchronizes with doc2Xml 
		lsn = getCurrentLSN(tabSetId); 	    
		setCommittedLSN(tabSetId, lsn);
		
		while ( switchLogFile(tabSetId) == false )
		{		
		    log(_modId, Logger::NOTICE, Chain("Logfile for tabSetId ") + Chain(tabSetId) + Chain(" still active, switch failed"));		
		    Sleeper s;
		    s.secSleep(LOGMNG_LOGSWITCH_WAIT_DELAY);		
		}
	    }
	    else
	    {
		// we write a sync in any case
		CegoLogRecord lr;
		lr.setAction(CegoLogRecord::LOGREC_SYNC);    
		logAction(tabSetId, lr);
		
		lsn = getCurrentLSN(tabSetId); 	    
		setCommittedLSN(tabSetId, lsn);	       		
	    }

	    Chain currentState = getTableSetRunState(tabSetId);
	    setTableSetRunState(tabSetId, XML_CHECKPOINT_VALUE);
	    
	    // sync control information to xml database file
	    doc2Xml();
	    
	    for ( unsigned long long i=0; i<_numSegment; i++)
	    {
		void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
		
		unsigned long long pageCount = 0;
		
		while ( pageCount < _numPages )
		{
		    BufferHead bh;
		    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
		    
		    if ( bh.isOccupied != NOT_OCCUPIED && bh.isDirty != 0 && bh.tabSetId == tabSetId )
		    {
			if ( currentState == Chain(XML_BACKUP_VALUE) )
			{
			    if ( needPageBackup(bh.pageId) )
			    {
#ifdef DEBUG
				log(_modId, Logger::DEBUG, Chain("Reading page (") + Chain(bh.fileId) + Chain(",") + Chain(bh.pageId) + Chain(") to log ..."));
#endif	
				int ts;
				unsigned fixStat;
				char *pageData = new char[_pageSize];
				readPage(bh.pageId, ts, fixStat, pageData, pLockHandle);
				
				CegoLogRecord lr;
				lr.setAction(CegoLogRecord::LOGREC_BUPAGE);
				lr.setData(pageData);
				lr.setDataLen(_pageSize);
				lr.setPageId(bh.pageId);
				
				if ( logAction(tabSetId, lr) == CegoLogManager::LOG_FULL )
				{
#ifdef DEBUG
				    log(_modId, Logger::DEBUG, Chain("Switching logFiles ..."));
#endif	    			    
				    while ( switchLogFile(tabSetId) == false )
				    {
					log(_modId, Logger::NOTICE, Chain("Logfile for tabSetId ") + Chain(tabSetId) + Chain(" still active, switch failed"));
					
					Sleeper s;
					s.secSleep(LOGMNG_LOGSWITCH_WAIT_DELAY);				
				    }
				    
				    if ( logAction(tabSetId, lr) != CegoLogManager::LOG_SUCCESS )
				    {
					delete[] pageData;
					throw Exception(EXLOC, "Cannot write to log");
				    }	    		
				}
				delete[] pageData;
			    }		 
			}
			_numDiskWrite++;
			_diskWriteTimer.start();
			
			writePage(bh.pageId, bh.fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE), pLockHandle);
			_diskWriteTimer.stop();
			
			_avgWriteDelay = ( _diskWriteTimer.getSum() /  (_numDiskWrite+(unsigned long long)1) ) / 1000;
			
			bh.isDirty = 0;
			memcpy(bufPtr, &bh, BUFFERHEAD_SIZE);	    
		    }
		    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));
		    pageCount++;
		}
	    }
	    // set back tableset runstate to saved value
	    setTableSetRunState(tabSetId, currentState);
	    // sync control information to xml database file
	    doc2Xml();
	}
	
	optimizePool(tabSetId);

	_cpCount++;
	
	pLockHandle->unlockBufferPool();    
	return lsn;
    }
    catch ( Exception e )
    {
	// if we got an exception, we have to unlock the still locked bufferpool area	
	pLockHandle->unlockBufferPool();
	throw Exception(EXLOC, Chain("Cannot write checkpoint"), e);
    }
}

unsigned long long CegoBufferPool::getCPCount() const
{
    return _cpCount;
}

void CegoBufferPool::dumpCheckpoint(int tabSetId)
{
    Chain tableSet = getTabSetName(tabSetId);
    Chain tsRoot = getTSRoot(tableSet);
    
    CegoCheckpointDump cpd(tableSet, tsRoot, _pageSize);
    
    cpd.startWrite();
    	    
    for ( unsigned long long i=0; i<_numSegment; i++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
	
	unsigned long long pageCount = 0;
	
	while ( pageCount < _numPages )
	{
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
	    
	    if ( bh.isOccupied != NOT_OCCUPIED && bh.isDirty != 0 && bh.tabSetId == tabSetId )
	    {
		cpd.dumpPage(bh.pageId, bh.fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE));
		bh.isDirty = 0;
		memcpy(bufPtr, &bh, BUFFERHEAD_SIZE);
	    }
	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));
	    pageCount++;
	}
    }
    cpd.finish();
}

void CegoBufferPool::commitCheckpoint(int tabSetId, CegoLockHandler *pLockHandle)
{
    Chain currentState = getTableSetRunState(tabSetId);
    
    Chain tableSet = getTabSetName(tabSetId);
    Chain tsRoot = getTSRoot(tableSet);
    
    CegoCheckpointDump cpd(tableSet, tsRoot, _pageSize);
    
    // commit the dump
    cpd.commitDump();
    
    // cout << "CRASH SIMULATION .." << endl;
    // for Debuging: Simuldate crash here
    // int *pCrash;
    // *pCrash = 42;

    PageIdType pageId;
    unsigned fixStat;
    char *buf = new char[_pageSize];

    cpd.startRead();

    bool logExceedDetected=false;
    
    while ( cpd.readDump(pageId, fixStat, buf) )	
    {
	if ( currentState == Chain(XML_BACKUP_VALUE) )
	{
	    if ( needPageBackup(pageId) )
	    {
#ifdef DEBUG
		log(_modId, Logger::DEBUG, Chain("Reading page (") + Chain(bh.fileId) + Chain(",") + Chain(bh.pageId) + Chain(") to log ..."));
#endif	
		int ts;
		unsigned oldFixStat;
		char *oldPageData = new char[_pageSize];
		try
		{
		    readPage(pageId, ts, oldFixStat, oldPageData, pLockHandle);
		
		    CegoLogRecord lr;
		    lr.setAction(CegoLogRecord::LOGREC_BUPAGE);
		    lr.setData(oldPageData);
		    lr.setDataLen(_pageSize);
		    lr.setPageId(pageId);
		    
		    if ( logAction(tabSetId, lr) == CegoLogManager::LOG_FULL && logExceedDetected == false )
		    {
			logExceedDetected = true;
			log(_modId, Logger::NOTICE, Chain("Logfile exceeded limit during checkpoint dump at offset ") + Chain(getLogOffset(tabSetId)));
		    }
		}
		catch ( Exception e )
		{
		    delete[] oldPageData;
		    throw Exception(EXLOC, Chain("Cannot perform page backup"), e);
		}
		delete[] oldPageData;
	    }		 
	}
	_numDiskWrite++;
	_diskWriteTimer.start();
	
	writePage(pageId, fixStat, buf, pLockHandle);
	_diskWriteTimer.stop();
		
	_avgWriteDelay = ( _diskWriteTimer.getSum() /  (_numDiskWrite+(unsigned long long)1) ) / 1000;
    }
    delete[] buf;    
    cpd.remove();
}

void CegoBufferPool::restoreCheckpointDump(int tabSetId, CegoLockHandler *pLockHandle)
{
    Chain tableSet = getTabSetName(tabSetId);
    Chain tsRoot = getTSRoot(tableSet);
    
    CegoCheckpointDump cpd(tableSet, tsRoot, _pageSize);
	
    if ( cpd.readyDumpExists() )
    {
	PageIdType pageId;
	unsigned fixStat;
	char *buf = new char[_pageSize];

	cpd.startRead();
	
	while ( cpd.readDump(pageId, fixStat, buf) )	
	{
	    _numDiskWrite++;
	    _diskWriteTimer.start();
	    
	    writePage(pageId, fixStat, buf, pLockHandle);
	    _diskWriteTimer.stop();
	    
	    _avgWriteDelay = ( _diskWriteTimer.getSum() /  (_numDiskWrite+(unsigned long long)1) ) / 1000;
	}	
	delete[] buf;
	cpd.remove();
    }
    cpd.cleanUp();
}
    
void CegoBufferPool::writeAndRemoveTabSet(int tabSetId, CegoLockHandler *pLockHandle)
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool");
    }

    for ( unsigned long long i=0; i<_numSegment; i++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
	
	unsigned long long pageCount = 0;
	
	while ( pageCount < _numPages )
	{
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
	    
	    if ( bh.tabSetId == tabSetId )
	    {
		if ( bh.isOccupied != NOT_OCCUPIED && bh.isDirty != 0 )
		{
		    _numDiskWrite++;
		    
		    _diskWriteTimer.start();
		    writePage(bh.pageId, bh.fixStat, (char*)((long long)bufPtr + BUFFERHEAD_SIZE), pLockHandle);
		    _diskWriteTimer.stop();
		    
		    _avgWriteDelay = ( _diskWriteTimer.getSum() /  (_numDiskWrite+(unsigned long long)1) ) / 1000;
		}
		bh.isOccupied = NOT_OCCUPIED;
		bh.numFixes=0;
		memcpy(bufPtr, &bh, BUFFERHEAD_SIZE);		
	    }
	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));
	    pageCount++;
	}
    }    
    releaseFiles(tabSetId);
}

unsigned long long CegoBufferPool::uptime() const
{
    Datetime ts;
    return ts.asLong() - _poolStart;
}

void CegoBufferPool::poolInfo(int& pageSize, 
			      unsigned long long& numTotal, 
			      unsigned long long& numUsed, 
			      unsigned long long& numFree, 
			      unsigned long long& numDirty, 
			      unsigned long long& numFixes, 
			      unsigned long long& numPersistent, 
			      unsigned long long& numNoSync, 
			      unsigned long long& numDiskRead, 
			      unsigned long long& numDiskWrite,
			      double& hitRate,
			      double& spreadRate,
			      unsigned long long& readDelay,
			      unsigned long long& writeDelay,
			      unsigned long long& curFixCount,
			      unsigned long long& maxFixCount,
			      unsigned long long& avgFixTry,
			      unsigned long long& statStart,
                              unsigned long long& uptime) const
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }
    
    pageSize = _pageSize;
    numTotal = _numPages * _numSegment;
    numUsed=0;
    numFree=0;
    numFixes=0;
    numDirty=0;
    numPersistent=0;
    numNoSync=0;

    unsigned long long numUsage = 0;
    
    for ( unsigned long long i=0; i<_numSegment; i++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
	
	unsigned long long pageCount = 0;

	// unsigned long long numSegPageUsed = 0;
	
	while ( pageCount < _numPages )
	{    
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
	    
	    numFixes += (long)bh.numFixes;
	    numDirty += (long)bh.isDirty;

	    // we don't count persistent hot spot pages
	    if ( bh.isOccupied != PERSISTENT_OCCUPIED )
		numUsage += bh.numUsage;
	    
	    if (bh.isOccupied == NOT_OCCUPIED)
	    {
		numFree++;
	    }
	    else if (bh.isOccupied == WRITE_ON_SYNC)
	    {
		// numSegPageUsed++;
		numUsed++;
	    }
	    else if (bh.isOccupied == WRITE_ON_DIRTY)
	    {
		// numSegPageUsed++;
		numUsed++;
		numNoSync++;
	    }
	    else if (bh.isOccupied == PERSISTENT_OCCUPIED)
	    {
		// numSegPageUsed++;
		numUsed++;
		numPersistent++;
	    }   
	    
	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));     
	    pageCount++;
	}
	// cout << "Segment " << i << " : " << numSegPageUsed << " pages used" << endl; 
    }	

    numDiskWrite = _numDiskWrite;
    numDiskRead = _numDiskRead;
	
    // hitRate = BUPMNG_STATSPERIOD - _numDiskRead / ( _fixCount / BUPMNG_STATSPERIOD + 1) ;
    hitRate = 100.0 * ( (double)_fixCount - _numDiskRead + 1 ) / ( (double)_fixCount + 1 ); 

    double usageMedian = (double)numUsage / (double)( _numSegment * _numPages );
    double usageDist = 0.0;
    
    for ( unsigned long long i=0; i<_numSegment; i++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
	
	unsigned long long pageCount = 0;
    
	while ( pageCount < _numPages )
	{    
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);

	    if ( bh.isOccupied != PERSISTENT_OCCUPIED )
	    {
		usageMedian > bh.numUsage ? usageDist += usageMedian - (double)bh.numUsage : usageDist += (double)bh.numUsage - usageMedian;
	    }

	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));
	    pageCount++;
	}
    }

    double avgDist = usageDist / (double)( _numSegment * _numPages );
    // cout << "DistMed = " << usageMedian << endl;
    // cout << "AvgDist = " << avgDist << endl;
    // cout << "NumUsage  = " << numUsage << endl;
    spreadRate =  avgDist / ( usageMedian + 1 ); 

    // cout << "Fix Dist Rate = " << fixDistRate << endl;
    
    readDelay = _avgReadDelay;
    writeDelay = _avgWriteDelay;
    statStart = _statStart;
    curFixCount = _fixCount;
    avgFixTry = _fixCount > 0 ? _fixTry / _fixCount : 0;
    maxFixCount = BUPMNG_STATSPERIOD;
    
    Datetime ts;
    uptime = ts.asLong() - _poolStart;    
}
    
void CegoBufferPool::getPoolEntryList(ListT<CegoBufferPoolEntry>& entryList)
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }

    entryList.Empty();
    
    for ( unsigned long long seg=0; seg<_numSegment; seg++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[seg] + BUFFERPOOLHEAD_SIZE );
	    
	unsigned long long pageCount = 0;
    	
	while ( pageCount < _numPages )
	{    
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
	    
	    bool isDirty = false;
	    if ( bh.isDirty )
		isDirty=true;
	    
	    Chain occState;
	    if (bh.isOccupied == NOT_OCCUPIED)
		occState=Chain("NOTOCCUPIED");
	    else if (bh.isOccupied == WRITE_ON_SYNC)
		occState=Chain("WRITEONSYNC");
	    else if (bh.isOccupied == WRITE_ON_DIRTY)
		occState=Chain("WRITEONDIRTY");
	    else if (bh.isOccupied == PERSISTENT_OCCUPIED)
		occState=Chain("PERSISTENT");
	    
	    CegoBufferPoolEntry bpe(seg, pageCount, occState, isDirty, bh.numFixes, bh.tabSetId, bh.pageId, bh.fixStat, bh.numUsage);
	    entryList.Insert(bpe);
	    
	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));
	    pageCount++;
	}	
    }
}

void CegoBufferPool::resetStats()
{
    _numDiskRead=0;
    _numDiskWrite=0;
    _fixCount=0;
    _fixTry=0;
    _avgReadDelay=0;
    _avgWriteDelay=0;
    _diskReadTimer.reset();
    _diskWriteTimer.reset();
    Datetime ts;
    _statStart = ts.asLong(); 
}

void CegoBufferPool::logIt(int tabSetId, CegoLogRecord& lr, CegoLockHandler* pLockHandle, bool flushLog)
{
    if ( isActive(tabSetId) )
    {
	_lmLock[tabSetId].writeLock(LM_LOCKTIMEOUT);
	
	try
	{
	    CegoLogManager::LogResult res = logAction(tabSetId, lr, flushLog);
	    
	    if ( res == CegoLogManager::LOG_FULL )
	    {
		// if log is full, we have to force a checkpoint
		// to be consistent, log entry has already been written to log
		
		Chain tableSet = getTabSetName(tabSetId);
		writeCheckPoint(tableSet, true, false, pLockHandle);
	    }
	    
	    if ( res == CegoLogManager::LOG_ERROR )
	    {
		Chain tableSet = getTabSetName(tabSetId);
		setTableSetSyncState(tableSet, XML_LOG_LOSS_VALUE);
		
		throw Exception(EXLOC, Chain("Cannot write to log"));
	    }
	}
	catch ( Exception e )
	{
	    _lmLock[tabSetId].unlock();
	    throw Exception(EXLOC, Chain("Cannot write to log"), e);
	}
	
	_lmLock[tabSetId].unlock();
    }
}

void CegoBufferPool::printPool()
{
    if (_pBufPool == 0)
    {
	throw Exception(EXLOC, "No valid bufferpool" );
    }
    
    cout << "--- BufferPool ---" << endl;
    cout << "BasePtr: " << (long long)_pBufPool << endl;
    cout << "PageSize: " << _pageSize << endl;
    cout << "NumPages: " << _numPages << endl;
    
    unsigned long long numUsed=0;
    unsigned long long numFree=0;
    unsigned long long numFixes=0;
    unsigned long long numDirty=0;

    for ( unsigned long long i=0; i<_numSegment; i++)
    {
	void *bufPtr = (void*)( (long long)_pBufPool[i] + BUFFERPOOLHEAD_SIZE );
	
	unsigned long long pageCount = 0;
    
	while ( pageCount < _numPages )
	{    
	    BufferHead bh;
	    memcpy(&bh, bufPtr, BUFFERHEAD_SIZE);
	    
	    numFixes += bh.numFixes;
	    numDirty += (unsigned long long)bh.isDirty;
	    
	    if (bh.isOccupied == 0)
		numFree++;
	    else
		numUsed++;
	    
	    bufPtr = (void*)((long long)bufPtr + (long long)(BUFFERHEAD_SIZE + _pageSize));     
	    pageCount++;
	}
    }
	
    cout << "NumUsed: " << numUsed << endl;
    cout << "NumFixes: " << numFixes << endl;
    cout << "NumDirty: " << numDirty << endl;
    cout << "NumFree: " << numFree << endl;

    cout << "NumFree: " << numFree << endl;
}

void CegoBufferPool::getLMLockStat(int tabSetId, Chain& lockName, int& lockCount, unsigned long long &numRdLock, unsigned long long &numWrLock, unsigned long long &sumRdDelay, unsigned long long &sumWrDelay)
{
    lockName = getTabSetName(tabSetId);
    lockCount = _lmLock[tabSetId].numLockTry();

    numRdLock = _lmLock[tabSetId].numReadLock();
    numWrLock = _lmLock[tabSetId].numWriteLock();
    sumRdDelay = 0;
    sumWrDelay = 0;

    if ( _lmLock[tabSetId].numReadLock() > 0 )
	sumRdDelay = _lmLock[tabSetId].sumReadDelay() / LCKMNG_DELRES;
    if ( _lmLock[tabSetId].numWriteLock() > 0 )
	sumWrDelay = _lmLock[tabSetId].sumWriteDelay() / LCKMNG_DELRES;
}

void CegoBufferPool::getAggLMLockStat(Chain& lockGroup, int& numLock, int& lockCount, unsigned long long &numRdLock, unsigned long long &numWrLock, unsigned long long &sumRdDelay, unsigned long long &sumWrDelay)
{
    lockGroup = Chain("LMLCK");
    
    lockCount = 0;
    numRdLock = 0;
    numWrLock = 0;
    sumRdDelay = 0;
    sumWrDelay = 0;
    numLock = 0;

    for (int i=0;i< TABMNG_MAXTABSET;i++)
    {	
	numLock++;
	lockCount += _lmLock[i].numLockTry();
	numRdLock += _lmLock[i].numReadLock();
	numWrLock += _lmLock[i].numWriteLock();
	
	if ( _lmLock[i].numReadLock() > 0 )
	    sumRdDelay += _lmLock[i].sumReadDelay() / LCKMNG_DELRES;   
	if ( _lmLock[i].numWriteLock() > 0 )
	    sumWrDelay += _lmLock[i].sumWriteDelay() / LCKMNG_DELRES;
    }
}

/////////////////////
// private methods //
/////////////////////

int CegoBufferPool::calcHash(PageIdType pageId)
{
    // we have to cast to long, since for large fileid values, we might exceed integer range
    // unsigned long long s = (unsigned long long)(fileId+1) * (unsigned long long)_hashkey + (unsigned long long)(pageId+1);    

    unsigned long long s = (unsigned long long)(pageId+1);    
    unsigned long long d = (unsigned long long)calcSegment(pageId) * (unsigned long long)_numPages;
    
    if ( s > d )
	s = s - d;
    
    return (int)( s % (unsigned long long)_numPages);
}

int CegoBufferPool::calcSegment(PageIdType pageId)
{
    return (int)( ( pageId / (unsigned long long)_numPages ) % (unsigned long long)_numSegment);
}

void CegoBufferPool::logBM(int tabSetId, int fileId, unsigned* fbm, int fbmSize, CegoLockHandler* pLockHandle)
{    
    CegoLogRecord lr;
    
    lr.setAction(CegoLogRecord::LOGREC_BUFBM);
    lr.setData((char*)fbm);
    lr.setDataLen(fbmSize * sizeof(unsigned));
    lr.setFileId(fileId);
    
    logIt(tabSetId, lr, pLockHandle);
}

bool CegoBufferPool::archiveComplete(const Chain& tableSet)
{
    ListT<Chain> lfList;
    ListT<int> sizeList;
    ListT<Chain> statusList;

    getLogFileInfo(tableSet, lfList, sizeList, statusList);
    
    Chain *pStatus = statusList.First();
    
    while ( pStatus ) 
    {
	if ( *pStatus == Chain(XML_OCCUPIED_VALUE))
	{
	    return false;
	}
	else
	{
	    pStatus = statusList.Next();
	}	
    }
    return true;
}

void CegoBufferPool::optimizePool(int tabSetId)
{
    Chain tableSet = getTabSetName(tabSetId);
    log(_modId, Logger::NOTICE, Chain("Optimizing bufferpool for tableset ") + tableSet + Chain(" ..."));
    
    unsigned long long numRelocated = 0;
	
    for ( unsigned long seg=0; seg<_numSegment; seg++)
    {
	void *basePtr = (void*)( (long long)_pBufPool[seg] + BUFFERPOOLHEAD_SIZE );
	    
	unsigned long long pageIndex = 0;
    	
	while ( pageIndex < _numPages )
	{	    
	    BufferHead bh;
	    memcpy(&bh, basePtr, BUFFERHEAD_SIZE);

	    if ( ( bh.isOccupied == WRITE_ON_SYNC || bh.isOccupied == WRITE_ON_DIRTY ) && bh.tabSetId == tabSetId && bh.numFixes == 0 )
	    {	    
		unsigned long long hashId = calcHash(bh.pageId);

		if ( pageIndex != hashId )
		{		    
		    void* checkPtr = (void*)((long long)_pBufPool[seg] + BUFFERPOOLHEAD_SIZE + (long long )hashId * (long long)( BUFFERHEAD_SIZE + _pageSize ));
		    
		    BufferHead cbh;
		    memcpy(&cbh, checkPtr, BUFFERHEAD_SIZE);
   
		    if ( cbh.isOccupied == NOT_OCCUPIED 
			 || ( cbh.numFixes == 0 && ( cbh.fixStat + BUPMNG_RELOCATION_THRESHOLD ) < bh.fixStat && cbh.isDirty == 0 ) )
		    {

			/*
			cout << "Drop Page:" << endl;
			cout << "PageId = " << cbh.pageId << endl;
			cout << "FixStat = " << cbh.fixStat << endl;
			cout << "Relocate Page:" << endl;
			cout << "PageId = " << bh.pageId << endl;
			cout << "FixStat = " << bh.fixStat << endl;
			*/

			numRelocated++;
			
			// relocate page
			memcpy(checkPtr, basePtr, BUFFERHEAD_SIZE + _pageSize);

			// clean slot
			BufferHead ibh;
			ibh.isOccupied = NOT_OCCUPIED;
			ibh.isDirty = 0;
			ibh.numFixes = 0;
			ibh.tabSetId = 0;
			ibh.pageId = 0;
			ibh.fixStat = 0;
			ibh.numUsage = 0;
			memcpy(basePtr, &ibh, BUFFERHEAD_SIZE );
		    }		    		    
		}
	    }
	    
	    pageIndex++;

	    basePtr = (void*)((long long)basePtr + (long long)( BUFFERHEAD_SIZE + _pageSize ));
	}	
    }
    log(_modId, Logger::NOTICE, Chain(numRelocated) + Chain(" pages relocated "));
}

void CegoBufferPool::lockPool(int tabSetId, bool doWrite)
{
    if ( doWrite )
    {
	_lmLock[tabSetId].writeLock(LM_LOCKTIMEOUT);
    }
    else
    {
	_lmLock[tabSetId].readLock(LM_LOCKTIMEOUT);
    }
}

void CegoBufferPool::unlockPool(int tabSetId)
{
    _lmLock[tabSetId].unlock();   
}
