/***************************************************************************/
/* 		This code is part of WWW graber called pavuk		   */
/*		Copyright (c) 1997,1998,1999 Ondrejicka Stefan		   */
/*		(ondrej@idata.sk)					   */
/*		Distributed under GPL 2 or later			   */
/***************************************************************************/

#include "config.h"

#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifdef HAVE_FNMATCH
#include <fnmatch.h>
#else
#include "fnmatch.h"
#endif

#include "lfname.h"
#include "url.h"
#include "tools.h"

static char *_strfindnchr(str , chr , n)
char *str;
int chr;
int n;
{
        int cnt;
        char *p;

        for (p = str , cnt = 0 ; *p && cnt < n ; p++)
        {
                if (*p == chr) cnt ++;
        }
        if (cnt != n) return NULL;
        else return p-1;
}

static char *_strrfindnchr(str , chr , n)
char *str;
int chr;
int n;
{
        int cnt;
        char *p;

        for (p = str+strlen(str)-1 , cnt = 0 ; p >= str && cnt < n ; p--)
        {
                if (*p == chr) cnt ++;
        }
        if (cnt != n) return NULL;
        else return p+1;
}


/* $x - x-th match section	*/
/* %i - protocol id		*/
/* %p - password		*/
/* %u - user name		*/
/* %h - host name		*/
/* %m - domain name		*/
/* %r - port number		*/
/* %d - doc path		*/
/* %n - doc name		*/
/* %b - base name of document	*/
/* %e - extension		*/
/* %s - search string		*/
/* %-x - x-th dirname from end	*/
/* %x - x-th dirname from start */

char *lfname_get_by_url(urlp , urlstr , lfnamep)
url *urlp;
char *urlstr;
lfname *lfnamep;
{
	char *ps,*pd,*pp,*p1,*p2;
	char pom[4096];
	char pstr[4096];
	int nr;
	char *n,*d,*t,*e,*b,*m;
#ifdef HAVE_POSIX_REGEX
	regmatch_t *pmatch = lfnamep->pmatch;
#endif

	p1 = url_get_path(urlp);
	if (urlp->type == URLT_GOPHER)
	{
		if (urlp->p.gopher.type == '1')
			sprintf(pstr , "/%c%s/%s" , urlp->p.gopher.type , 
				urlp->p.gopher.selector , cfg.index_name);
		else
			sprintf(pstr , "/%c%s" , urlp->p.gopher.type , 
				urlp->p.gopher.selector);

	}
	else if (tl_is_dirname(p1) ||
		((urlp->type == URLT_FTP || urlp->type == URLT_FTPS) &&
		 urlp->p.ftp.dir))
	{
		sprintf(pstr , "%s/%s" , p1 , cfg.index_name);
	}
	else strcpy(pstr , p1);

	t = get_abs_file_path(pstr);

	strcpy(pstr , t);

	p1 = strrchr(pstr , '/');

	d = p1 ? new_n_string(pstr , p1 - pstr) : new_string("");

	n = p1 ? new_string(p1+1) : new_string(pstr);

	e = new_string(get_extension(pstr));

	p1 = strrchr(n , '.');

	if (p1) b = new_n_string(n , p1 - n);
	else b = new_string(n);

	m = url_get_site(urlp);
	p1 = strchr(m , '.');
	if (p1) m = p1+1;

	pom[0] = '\0';

	for (ps = lfnamep->transstr, pd = pom; *ps ; ps++)
	{
		if (!*(ps+1)) 
		{
			*pd = *ps;
			pd++;
			*pd = '\0';
			continue;
		}
		switch (*ps)
		{
			case '\\':
				ps++;
				*pd = *ps;
				pd++;
				*pd = '\0';
			break;
#ifdef HAVE_REGEX
			case '$':
				ps++;
				nr = strtol(ps , &pp , 10);
#ifdef HAVE_POSIX_REGEX
				if (nr >= 0 && nr <= lfnamep->preg.re_nsub)
				{
					strncpy(pd , urlstr+pmatch[nr].rm_so , pmatch[nr].rm_eo - pmatch[nr].rm_so);
		                	pd[pmatch[nr].rm_eo - pmatch[nr].rm_so] = '\0';
				}
#endif
#if 0
#ifdef HAVE_V8_REGEX
				{
					char ssect[10];
					if (nr)
						sprintf(ssect , "\\%d" , nr);
					else
						strcpy(ssect , "&");
					regsub(lfnamep->preg , ssect , pstr);
					strcpy(pd , pstr);
				}
#endif
#endif
#ifdef HAVE_GNU_REGEX
				if (nr >= 0 && nr < lfnamep->preg.re_nsub)
				{
					strncpy(pd , urlstr+lfnamep->pmatch.start[nr] , 
						lfnamep->pmatch.end[nr] - lfnamep->pmatch.start[nr]);
		                	pd[lfnamep->pmatch.end[nr] - lfnamep->pmatch.start[nr]] = '\0';
				}
#endif
				while (*pd) pd++;
				ps = pp-1;
			break;
#endif
			case '%':
				ps++;
				switch (*ps)
				{
					case 'i': strcpy(pstr, prottable[urlp->type].dirname);
					break;
					case 'p': strcpy(pstr, url_get_pass(urlp , NULL));
					break;
					case 'u': strcpy(pstr, url_get_user(urlp , NULL));
					break;
					case 'h': strcpy(pstr, url_get_site(urlp));
					break;
					case 'm': strcpy(pstr, m);
					break;
					case 'r': sprintf(pstr, "%d" , url_get_port(urlp));
					break;
					case 't': strcpy(pstr, t);
					break;
					case 'd': strcpy(pstr, d);
					break;
					case 'n': strcpy(pstr, n);
					break;
					case 'b': strcpy(pstr, b);
					break;
					case 'e': strcpy(pstr, e);
					break;
					case 's': strcpy(pstr, url_get_search_str(urlp));
					break;
					case '-':
						nr = strtol(ps+1 , &pp , 10);
						p1 = _strrfindnchr(d , '/' , nr);
						p2 = _strrfindnchr(d , '/' , nr+1);
						if (!p1) pstr[0] = '\0';
						else if (p2)
						{
							strncpy(pstr , p2+1 , p1-1-p2);
							*(pstr+(p1-1-p2)) = '\0';
						}
						else pstr[0] = '\0';
						ps = pp-1;
					break;
					case '1':
					case '2':
					case '3':
					case '4':
					case '5':
					case '6':
					case '7':
					case '8':
					case '9':
						nr = strtol(ps , &pp , 10);
						p1 = _strfindnchr(d , '/' , nr);
						p2 = _strfindnchr(d , '/' , nr+1);
						if (!p1) pstr[0] = '\0';
						else if (p2)
						{
							strncpy(pstr , p1+1 , p2-1-p1);
							*(pstr+(p2-1-p1)) = '\0';
						}
						else strcpy(pstr , p1+1);
						ps = pp-1;
					break;
					default:
						pstr[0] = *(ps-1);
						pstr[1] = *ps;
						pstr[2] = '\0';
				}
				strcat(pd , pstr);
				while (*pd) pd++;
			break;
			default:
				*pd = *ps;
				pd++;
				*pd = '\0';
		}
	}
	free(e);
	free(n);
	free(t);
	free(d);
	return new_string(pom);
}

void lfname_free(lfnamep)
lfname *lfnamep;
{
#ifdef HAVE_REGEX
	if (lfnamep->type == LFNAME_REGEX)
	{
#ifdef HAVE_POSIX_REGEX
		regfree(&(lfnamep->preg));
		_free(lfnamep->pmatch);
#endif
#ifdef HAVE_V8_REGEX
	_free(lfnamep->preg);
#endif
#ifdef HAVE_GNU_REGEX
	regfree(&lfnamep->preg);
	_free(lfnamep->pmatch.start);
	_free(lfnamep->pmatch.end);
#endif
	}
#endif

	_free(lfnamep->matchstr);
	_free(lfnamep->transstr);
	_free(lfnamep);
}

lfname *lfname_new(type , mpt , str)
lfname_type type;
char *mpt;
char *str;
{
	lfname *rv;
	char *p;

	rv = _malloc(sizeof(lfname));
	rv->type = type;
#ifdef HAVE_REGEX
	if (type == LFNAME_REGEX)
	{
#ifdef HAVE_POSIX_REGEX
		int ec;
		if ((ec = regcomp(&(rv->preg) , mpt , REG_EXTENDED)))
		{
			char pom[PATH_MAX];
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , mpt);
			regerror(ec , &(rv->preg) , pom , sizeof(pom));
			xprintf(0 , "%s\n" , pom);
			regfree(&(rv->preg));
			free(rv);
			return NULL;
		}
		rv->pmatch = _malloc((rv->preg.re_nsub + 1) * sizeof(regmatch_t));
#endif
#ifdef HAVE_V8_REGEX
		if (!(rv->preg = regcomp(mpt)))
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , mpt);
			free(rv);
			free(rv->preg);
			return NULL;
		}
#endif
#ifdef HAVE_BSD_REGEX
		if ((p = re_comp(mpt)))
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , mpt);
			xprintf(0, p);
			return NULL;
		}
#endif
#ifdef HAVE_GNU_REGEX
		rv->preg.allocated = 0;
		rv->preg.buffer = NULL;
		rv->preg.fastmap = NULL;
		re_set_syntax(r_2phase_star);
		if ((p = re_compile_pattern(mpt, strlen(mpt) , &rv->preg)))
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , mpt);
			xprintf(0 , "%s\n", p);
			regfree(&(rv->preg));
			return NULL;
		}
		rv->pmatch.start = _malloc((rv->preg.re_nsub + 1) * sizeof(*rv->pmatch.start));
		rv->pmatch.end = _malloc((rv->preg.re_nsub + 1) * sizeof(*rv->pmatch.end));
		rv->pmatch.num_regs  = rv->preg.re_nsub + 1;
		rv->preg.regs_allocated = REGS_FIXED;
#endif
	}
#endif
	rv->matchstr = new_string(mpt);
	rv->transstr = new_string(str);
	return rv;
}

int lfname_match(lfnamep , urlstr)
lfname *lfnamep;
char *urlstr;
{
#ifdef HAVE_REGEX
	if (lfnamep->type == LFNAME_REGEX)
#ifdef HAVE_POSIX_REGEX
		return !regexec(&(lfnamep->preg) , urlstr , lfnamep->preg.re_nsub + 1 , lfnamep->pmatch , 0);
#endif
#ifdef HAVE_V8_REGEX
		return regexec(lfnamep->preg , urlstr);
#endif
#ifdef HAVE_BSD_REGEX
	{
		re_comp(lfnamep->matchstr);
		return re_exec(urlstr);
	}
#endif
#ifdef HAVE_GNU_REGEX
		return re_match(&(lfnamep->preg), urlstr, strlen(urlstr), 0 , &lfnamep->pmatch) >= 0;
#endif
	else
#endif
		return !fnmatch(lfnamep->matchstr , urlstr , FNM_PATHNAME);
}

int lfname_check_pattern(type , str)
lfname_type type;
char *str;
{
#ifdef HAVE_REGEX
	if (type == LFNAME_REGEX)
	{
#ifdef HAVE_POSIX_REGEX
		int ec;
		char pom[PATH_MAX];
		regex_t preg;

		ec = regcomp(&preg , str , REG_EXTENDED);

		if (ec)
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , str);
			regerror(ec , &preg , pom , sizeof(pom));
			xprintf(0 , "%s\n" , pom);
		}
		regfree(&preg);
		return !ec;
#endif
#ifdef HAVE_V8_REGEX
		regexp *preg;

		preg = regcomp(str);

		if (!preg)
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , str);
		_free(preg);
		return preg != NULL;
#endif
#ifdef HAVE_BSD_REGEX
		char *p;

		p = re_comp(str);

		if (p)
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , str);
			xprintf(0 , p);
		}
		return p == NULL;
#endif
#ifdef HAVE_GNU_REGEX
		char *p;
		struct re_pattern_buffer preg;

		preg.allocated = 0;
		preg.buffer = NULL;
		preg.fastmap = NULL;
		
		if ((p = re_compile_pattern(str, strlen(str) , &preg)))
		{
			xprintf(0 , gettext("Error compiling regular expression : %s\n") , str);
			xprintf(0 , "%s\n" , p);
		}
		regfree(&preg);
		return p == NULL;
#endif
	}
	else
#endif
		return TRUE;
}

