import { bannedPhrases, allowedPhrases, domainExt } from './disintermediation';

/**
 * Maps a phrase to a regex pattern that matches the phrase with any number of spaces
 * between each character to catch cases such as "i n s t a g r a m"
 * @param {string} phrase - word or phrase to map to a regex pattern
 * @returns {string} regex pattern
 */
const spaceBetweenChars = (phrase) => {
    return (
        phrase
            // Split by characters or by escaped sequences (like \( and \))
            .split(/(\\\\[^\\]|.)/)
            .filter(Boolean)
            .reduce((acc, char, index, arr) => {
                // If the character is not an escaped sequence, add [\\s]* after it
                if (!char.startsWith('\\')) {
                    acc.push(char, '[\\s]*');
                } else {
                    acc.push(char);
                }

                // If the next character is an escaped sequence, add [\\s]* before it
                if (arr[index + 1]?.startsWith('\\')) {
                    acc.push('[\\s]*');
                }

                return acc;
            }, [])
            .join('')
    );
};

export const ukPhoneNumberRegexEdge = new RegExp(
    '^(((\\+?44\\s?\\d{4}|\\(?0\\d{4}\\)?)\\s?\\d{3}\\s?\\d{3})|((\\+?44\\s?\\d{3}|\\(?0\\d{3}\\)?)\\s?\\d{3}\\s?\\d{4})|((\\+?44\\s?\\d{2}|\\(?0\\d{2}\\)?)\\s?\\d{4}\\s?\\d{4}))(\\s?\\#(\\d{4}|\\d{3}))?$'
);

export const ukPhoneNumberRegex =
    /(\+?\d{2}\s?)?(\d{4}|\(\d{4}\))(\s?\d{3}\s?\d{3}|\s?\d{2}\s?\d{4}\s?\d{4})|\(\d{10,11}\)|\+(\w+)/g;

// Phone numbers
export const phoneRegex =
    /(\b\d{1,3}[-\s]?){9,}\d\b|(\+?\d{2,4}\s?)?(\(\d{2,4}\)|\d{2,4})(\s?\d{2,4}){2,3}\b|(\b\d(-\d){9,}\d\b)|(\b0\d{4}\s\d{3}\s\d{3}\b)/g;

// String of numbers (5+ digits)
export const longNumberRegex = /\b\d{5,}\b/g;

// number separated by any special character, excluding date format
// negative lookbehind for date format (dd/mm/yyyy or dd/mm/yy) and time format (hh:mm)
export const digitsSeparatedRegex =
    /(?!\d{4}\b|\d{2}\b)(?!\b([0-1]?[0-9]|2[0-4]):([0-5][0-9])\b)\b(?:\d+[^\w|\s]+\d+[^\w|\s])+\b/g;

// String of words containing numbers
export const numberWordRegex =
    /\b(zero|one|two|three|four|five|six|seven|eight|nine|ten)(?:\s+(?:zero|one|two|three|four|five|six|seven|eight|nine|ten))+\b/gi;

// Instagram tags
export const instagramRegex = /@[\s]*[a-zA-Z]+/gi;

// Banned phrases and characters
export const wordNumberRegex =
    /\b(zero|one|two|three|four|five|six|seven|eight|nine)\b(?!\d{1,2}[\/]\d{1,2}(?:[\/]\d{2,4})?|\s*(?:courses?|options?|or|and|people|guests?|desserts?|mains?|sides?|starters?))\D{0,15}\d+/gi; // Matches words followed by numbers, but not dates or courses (max 15 characters between word and number to avoid matching normal sentences but still match "one then 2" or "one followed by 2")
export const mixedNumberWordRegex =
    /(?:zero|one|two|three|four|five|six|seven|eight|nine)\d+/gi;

const bannedPhrasesPattern = `\\b(?!${allowedPhrases.join(
    '|'
)})\\b(?:${bannedPhrases
    // .map(spaceBetweenChars) // Michelin star flagged as insta
    .join('|')})\\b`;
export const bannedPhrasesRegex = new RegExp(bannedPhrasesPattern, 'gi');

// Email regex
export const emailRegex =
    /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;

// Website regex (matches any website except yhangry.com)
export const websiteRegex = new RegExp(
    `\\b(?!(https?:\\/\\/)?(www\\.)?yhangry\.com|.*maps\\..*|.*\\/maps\\/.*)((https?|ftp|smtp):\\/\\/)?(www\\.)?[a-z0-9]+\\.(?:${domainExt.join(
        '|'
    )}\\b)(\\/[a-zA-Z0-9#?&]+\\/?)*`,
    'gi'
);

export const consolidatedPhoneNumberRegex = new RegExp(
    '(\\+44\\s?\\d{2,4}\\s?\\d{2,5}(\\s?\\d{2,5})?)' + // UK phone numbers with +44 (partial and complete)
        '|(\\(?0\\d{2,4}\\)?\\s?\\d{2,5}(\\s?\\d{2,5})?)' + // UK phone numbers without +44 (partial and complete)
        '|(\\b\\d{1,3}[-\\s]?){9,}\\d\\b' + // General phone numbers
        '|(\\b\\d(-\\d){9,}\\d\\b)' + // Numbers with dashes
        '|\\(\\d{10,11}\\)' + // UK phone numbers in parentheses
        '|\\b0\\d{4}\\s\\d{3}\\s\\d{3}\\b', // Additional pattern for UK phone numbers
    'gi'
);

export const regexes = {
    websiteRegex,
    wordNumberRegex,
    mixedNumberWordRegex,
    consolidatedPhoneNumberRegex,
    emailRegex,
    numberWordRegex,
    longNumberRegex,
    digitsSeparatedRegex,
    bannedPhrasesRegex,
    instagramRegex,
};
