///////////////////////////////////////////////////////////////////////////////
//                                                         
// TextIndex.cc
// ------------
// TextIndex implementation module
//                                               
// Design and Implementation by Bjoern Lemke               
//                                                         
// (C)opyright 2000-2016 Bjoern Lemke                        
//                                                         
// IMPLEMENTATION MODULE
//
// Class: TextIndex
// 
// Description: Full text index utility class
//
// Status: CLEAN
// 
///////////////////////////////////////////////////////////////////////////////

// BASE INCLUDES
#include "Chain.h"
#include "TextIndex.h"
#include "Exception.h"
#include "Tokenizer.h"

TextIndex::TextIndex(const Chain& text, const SetT<IndexFilter>& filter, unsigned int depth, const Chain& sep, char esc)
{
    _text = text;
    _depth = depth;
    _sep = sep;
    _esc = esc;
    _tokenArray = 0;
}

TextIndex::~TextIndex()
{
    if ( _tokenArray )
	delete _tokenArray;
}

bool TextIndex::nextKey(Chain& key)
{
    if ( _tokenArray == 0 )
    {	
	tokenizeText();
	
	for ( unsigned i=0; i<_depth;i++)
	{
	    _depPos[i]=i;
	}
	
	Chain concat=*_tokenArray[_depPos[0]];
	for ( unsigned i=1; i<_depth; i++)
	{
	    concat += Chain(",") + *_tokenArray[_depPos[i]];
	}
	
	key = concat;
	
    }

    if ( nextPos(_depth-1) )
    {
	Chain concat=*_tokenArray[_depPos[0]];
	for ( unsigned i=1; i<_depth; i++)
	{
	    concat += Chain(",") + *_tokenArray[_depPos[i]];
	}
	
	key = concat;
	
	return true;
    }
    return false;
}

bool TextIndex::nextPos(unsigned int d)
{
    if ( _depPos[d] < _tokenSet.Size() - ( _depth - d ) )
    {
	_depPos[d]++;
	unsigned  p=_depPos[d];
	for ( unsigned i=d; i<_depth; i++ )
	{
	    _depPos[i]=p;
	    p++;
	}
	return true;
    }
    else if ( d > 0 )
    {
	return nextPos(d-1);
    }
    else
    {
	return false;
    }
}

void TextIndex::tokenizeText()
{
    Tokenizer tok(_text, _sep, _esc);
    Chain token;
    while ( tok.nextToken(token) )
    {
	token=token.toLower();
	
	IndexFilter *pFilter = _filter.First();
	bool match=false;
	while ( pFilter && match==false)
	{
	    match = pFilter->match(token);

	    if ( match )
	    {
		token = pFilter->getToken();
	    }
	    else
	    {
		pFilter = _filter.Next();
	    }
	}
	if ( _tokenSet.Find(token) == 0 )
	    _tokenSet.Insert(token);
	
    }


    _tokenArray = new Chain*[_tokenSet.Size()];
    Chain *pToken = _tokenSet.First();
    unsigned i=0;
    while ( pToken )
    {
	// cout << "Got token " << *pToken << endl;
	_tokenArray[i] = pToken;
	pToken = _tokenSet.Next();
	i++;
    }
    // cout << i << " Tokens detected" << endl;
}
