///////////////////////////////////////////////////////////////////////////////
//                                                         
// Replacer.cc
// -----------
// String replacement via regular expression handling
//                                               
// Design and Implementation by Bjoern Lemke               
//                                                         
// (C)opyright 2000-2025 Bjoern Lemke
//
// IMPLEMENTATION MODULE
//
// Class: Matcher
// 
// Description: String replacement  utility class
//
// Status: CLEAN
//
///////////////////////////////////////////////////////////////////////////////

// POSIX INCLUDES
#include <stdlib.h>
#include <string.h>

// LFC INCLUDES
#include "Exception.h"
#include "Chain.h"
#include "Replacer.h"

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <regex.h>

#define REPSIGN '$'

Replacer::Replacer(const Chain& expr, const Chain& replace)
{    
    _expr = expr;
    _replace = replace;
    _pRE = 0;
    _m = 0;
}

Replacer::~Replacer()
{
    if ( _pRE )
    {
	regfree((regex_t*)_pRE);
	free(_pRE);
    }
    if ( _m )
	free(_m);
}

void Replacer::set(const Chain& expr, const Chain& replace)
{
    _expr = expr;
    _replace = replace;

    if (  _pRE )
    {
	regfree((regex_t*)_pRE);
	free(_pRE);
	_pRE = 0;
    }
    if ( _m )
    {
	free(_m);
	_m = 0;
    }
}

const Chain& Replacer::getExpr() const
{
    return _expr;
}

const Chain& Replacer::getReplace() const
{
    return _replace;
}

void Replacer::prepare()
{

    if (  _pRE )
    {
	regfree((regex_t*)_pRE);
	free(_pRE);
	_pRE = 0;
    }
    
    if ( _m )
    {
	free(_m);
	_m = 0;
    }

    if ( (char*)_expr != 0 )
    {
	_pRE = (regex_t*)malloc(sizeof(regex_t));

	int err;
	if ( ( err = regcomp((regex_t*)_pRE, (char*)_expr, REG_EXTENDED) ) == 0 )
	{	 
	    _nmatch = _pRE->re_nsub;
	    
	    _m = (regmatch_t*)malloc((_nmatch + 1)*sizeof(regmatch_t));
			    
	    char *p;
	    
            // count back references in replace
	    _br = 0;
	    p = (char*)_replace;
	    while( *p )
	    {		
		if (*p == REPSIGN )
		{

		    p++;		    
		    int c = (int)*p - 48;

		    if ( c > 0 && c < 10 )
		    {			
			_br++;			
			if ( c > (int)_nmatch )
			{
			    Chain msg = Chain("Invalid replacement reference");
			    throw Exception(EXLOC, msg); 
			}
		    }		    
		}
		else
		    p++;		
	    }
	}
	else
	{
	    Chain msg = Chain("Regcomp error " + Chain(err));
	    throw Exception(EXLOC, msg); 
	}	
    }
    else
    {
	Chain msg = Chain("Invalid regular expression <") + _expr + Chain(">"); 
	throw Exception(EXLOC, msg); 
    }
}

int Replacer::replace(const Chain& src, Chain& target)
{
    if ( _pRE )
    {
	int replacements = 0;

	int start=1;

	// cout << "Starting regex for " << src << " with nmatch = " << _nmatch << endl;

	int  err = 0;
	
	while( *((char*)src + start - 1) && err == 0)
	{		    
	     err = regexec(_pRE, (char*)src + start - 1, _nmatch + 1, _m, REG_NOTBOL);
	    if ( err > 1)
	    {
		throw Exception(EXLOC, Chain("regex error = ") + Chain(err)); 
	    }
	    else if ( err == 0 )
	    {

		/*
		cout << "Starting regex for " << (char*)src + start - 1 << " Start = " << start << endl;
		cout << "Setting target from " << start << " to " << _m[0].rm_so << endl;
		cout << "End offset = " << _m[0].rm_eo << endl;
		*/

		// if available, add prefix 
		if ( _m[0].rm_so > 0 )
		    target += src.subChain(start, start + _m[0].rm_so - 1);
		
		// cout << "Target 0 = " << target << endl;
		
		char* pStart = (char*)_replace;
		char* p = pStart;
		
		int rplstart = 0;
		int rplend = p - pStart + 1;
		
		while ( *p )
		{		
		    if ( *p == REPSIGN )
		    {		    		    
			p++;
			int c = *p - 48 ;
			
			rplend = p - pStart - 1;
			
			if ( c > 0 && c < 10 )
			{			
			    // cout << "Target 1 = " << target << " from " << rplstart << " to " << rplend <<  endl;

			    // if available, add replace pattern prefix
			    if ( rplstart > 0 && rplend >= rplstart )
				target += _replace.subChain(rplstart, rplend);
			    
			    // cout << "Target 1a = " << target << " from  = " << _m[c].rm_so << " to " << _m[c].rm_eo  <<  endl;

			    // if available, add replacement string
			    if ( _m[c].rm_eo > _m[c].rm_so )
				target += src.subChain(start+_m[c].rm_so, start+_m[c].rm_eo-1);
			    
			    // cout << "Target 2 = " << target << endl;
			    
			    p++; 
			    
			    rplstart=rplend+3;
			    rplend=rplstart;
			}
			else
			{					     
			    p++;
			    
			    // cout << "Target X = " << target << " " << rplstart << " " << rplend <<  endl;
			    if ( rplstart > 0 && rplend >= rplstart )
				target += _replace.subChain(rplstart, rplend);
			    
			    // we take the $ sign with next chunk
			    rplstart=rplend+2;
			    rplend=rplstart;
			}		    
		    }
		    else 
		    {		      
			if ( rplstart == 0 )
			{
			    rplstart = p - pStart + 1;
			    rplend = p - pStart  + 1 ;
			}
			
			p++;		    
		    }
		}
			

		// cout << "Target 3 = " << target << endl;
		
		// cout << "rplend = " << rplend << " replace len = " << _replace.length() << endl;

		// if available, add replacement postfix
		if ( rplend < (int)_replace.length() )
		{
		    target += _replace.subChain(rplend, _replace.length());
		}

		// cout << "Target 4 = " << target << endl;
	    

		start +=  _m[0].rm_eo;
		
		replacements++;

	    }

	}

	// cout << "Adding tail from " << start << " to " << src.length() << endl;

	// if available, add source suffix
	if ( start > 0 && (int)src.length() >= start )
	    target += src.subChain(start, src.length());

	// cout << "Target 5 = " << target << endl;
	
        return replacements;
    }
    throw Exception(EXLOC, "Replacer not prepared"); 
}


/* Original Code */


/*   
int regex_replace(char **str, const char *pattern, const char *replace) {
    // replaces regex in pattern with replacement observing capture groups
    // *str MUST be free-able, i.e. obtained by strdup, malloc, ...
    // back references are indicated by char codes 1-31 and none of those chars can be used in the replacement string such as a tab.
    // will not search for matches within replaced text, this will begin searching for the next match after the end of prev match
    // returns:
    //   -1 if pattern cannot be compiled
    //   -2 if count of back references and capture groups don't match
    //   otherwise returns number of matches that were found and replaced
    //
    regex_t reg;
    unsigned int replacements = 0;
    // if regex can't commpile pattern, do nothing
    if(!regcomp(&reg, pattern, REG_EXTENDED)) {
        size_t nmatch = reg.re_nsub;
        regmatch_t m[nmatch + 1];
        const char *rpl, *p;
        // count back references in replace
        int br = 0;
        p = replace;
        while(1) {
            while(*++p > 31);
            if(*p) br++;
            else break;
        } // if br is not equal to nmatch, leave
        if(br != nmatch) {
            regfree(&reg);
            return -2;
        }
        // look for matches and replace
        char *new;
        char *search_start = *str;
        while(!regexec(&reg, search_start, nmatch + 1, m, REG_NOTBOL)) {
            // make enough room
            new = (char *)malloc(strlen(*str) + strlen(replace));
            if(!new) exit(EXIT_FAILURE);
            *new = '\0';
            strncat(new, *str, search_start - *str);
            p = rpl = replace;
            int c;
            strncat(new, search_start, m[0].rm_so); // test before pattern
            for(int k=0; k<nmatch; k++) {
                while(*++p > 31); // skip printable char
                c = *p;  // back reference (e.g. \1, \2, ...)
                strncat(new, rpl, p - rpl); // add head of rpl
                // concat match
                strncat(new, search_start + m[c].rm_so, m[c].rm_eo - m[c].rm_so);
                rpl = p++; // skip back reference, next match
            }
            strcat(new, p ); // trailing of rpl
            unsigned int new_start_offset = strlen(new);
            strcat(new, search_start + m[0].rm_eo); // trailing text in *str
            free(*str);
            *str = (char *)malloc(strlen(new)+1);
            strcpy(*str,new);
            search_start = *str + new_start_offset;
            free(new);
            replacements++;
        }
        regfree(&reg);
        // ajust size
        *str = (char *)realloc(*str, strlen(*str) + 1);
        return replacements;
    } else {
        return -1;
    }
}

const char test1[] = "before [link->address] some text [link2->addr2] trail[a->[b->c]]";
const char *pattern1 = "\\[([^-]+)->([^]]+)\\]";
const char replace1[] = "<a href=\"\2\">\1</a>";

const char test2[] = "abcabcdefghijklmnopqurstuvwxyzabc";
const char *pattern2 = "abc";
const char replace2[] = "!abc";

const char test3[] = "a1a1a1a2ba1";
const char *pattern3 = "a";
const char replace3[] = "aa";
int main(int argc, char *argv[])
{
    char *str1 = (char *)malloc(strlen(test1)+1);
    strcpy(str1,test1);
    puts(str1);
    printf("test 1 Before: [%s], ",str1);
    unsigned int repl_count1 = regex_replace(&str1, pattern1, replace1);
    printf("After replacing %d matches: [%s]\n",repl_count1,str1);
    free(str1);

    char *str2 = (char *)malloc(strlen(test2)+1);
    strcpy(str2,test2);
    puts(str2);
    printf("test 2 Before: [%s], ",str2);
    unsigned int repl_count2 = regex_replace(&str2, pattern2, replace2);
    printf("After replacing %d matches: [%s]\n",repl_count2,str2);
    free(str2);

    char *str3 = (char *)malloc(strlen(test3)+1);
    strcpy(str3,test3);
    puts(str3);
    printf("test 3 Before: [%s], ",str3);
    unsigned int repl_count3 = regex_replace(&str3, pattern3, replace3);
    printf("After replacing %d matches: [%s]\n",repl_count3,str3);
    free(str3);
}

*/
