import { distance } from "fastest-levenshtein"

const replaceForExactMatch = ["kjede", "-"]
const replaceForGeneralMatch = ["&", "og", "Og", "'", "AS"]

const formatChainNameFromArray = (chainName: string, array: string[]) =>
	chainName.replace(new RegExp(array.join("|"), "gi"), "").trim()

const formatChainNamePrimary = (chainName: string) =>
	formatChainNameFromArray(chainName, replaceForExactMatch)
const formatChainNameSecondary = (chainName: string) =>
	formatChainNameFromArray(chainName, replaceForGeneralMatch)

// The `linkStoreNameToChain` function is designed to match a given store name to a chain name from a list of chain names
// It does this by calculating the string similarity between the store name and each chain name using the Levenshtein distance.

/**
The function works well for most cases, including:
- Extra spaces in the store name
- Typos in the store name
- Different capitalization
- Extra words like location in the store name
- Misspellings in the store name
- Missing location in the store name
- Special characters in the store name
- Extra punctuation in the store name
- Different word order in the store name
*/

export const linkStoreNameToChain = (storeNameValue: string, chainNames: string[]) => {
	const storeName = storeNameValue?.toString()
	if (!storeName) return ""

	const formattedChainNamesPrimary = chainNames.map(formatChainNamePrimary)

	// Check for case sensitive exact match first
	const exactMatch =
		formattedChainNamesPrimary.find(chain => storeName.includes(chain)) ||
		formattedChainNamesPrimary.find(chain => chain.includes(storeName)) ||
		formattedChainNamesPrimary.find(chain =>
			chain
				.replace(/[ .]/g, "")
				.toLowerCase()
				.includes(storeName.replace(/[ .-]/g, "").toLowerCase())
		)
	if (exactMatch) {
		const matchIndex = formattedChainNamesPrimary.indexOf(exactMatch)
		return chainNames[matchIndex]
	}

	const formattedChainNamesSecondary = formattedChainNamesPrimary.map(formatChainNameSecondary)
	// Convert storeName to lowercase
	const lowerCaseStoreName = storeName.toLowerCase().replaceAll(".", "")
	const storeNameWords = lowerCaseStoreName.split(" ")

	// Link by best similarity score using Levenshtein distance (lower is better)
	const wordThreshold = 0.1
	const similarityScores = formattedChainNamesSecondary.map(chain => {
		const chainWords = chain.toLowerCase().split(" ")
		const abbreviationScore = calculateAbbreviationScore(lowerCaseStoreName, chain)

		// Calculate the Levenshtein distance between all individual words in the store name and the chain name
		const highestWordScores = chainWords.map(chainWord => {
			const distances = storeNameWords.map(storeNameWord => distance(storeNameWord, chainWord))

			// Normalize by chain word length
			return Math.min(...distances) / chainWord.length
		})

		// Check if all words are a bad match to avoid false positives
		const allWordsBelowThreshold = highestWordScores.every(score => score > wordThreshold)

		// Calculate the average of the Levenshtein distances
		const wordScore = highestWordScores.reduce((a, b) => a + b, 0) / highestWordScores.length
		return allWordsBelowThreshold ? Infinity : wordScore / abbreviationScore
	})

	const bestScore = Math.min(...similarityScores)

	// Remove obvious bad matches
	if (bestScore > 4) return ""

	// Find the index of the highest score
	const highestScoreIndex = similarityScores.indexOf(bestScore)
	// Return the chain name
	return chainNames[highestScoreIndex]
}

const getAbbreviation = (chainName: string) => {
	const words = chainName.split(" ")
	// Don't abbreviate single word chains
	if (words.length <= 2) return false
	return words
		.map(word => word[0])
		.join("")
		.toLowerCase()
}

// This is used to match abbreviations like "BoE" to "Boys of Europe"
const calculateAbbreviationScore = (store: string, chain: string) => {
	const chainAbbreviation = getAbbreviation(chain)
	const processedStore = store
		.toLowerCase()
		.replaceAll("-", "")
		.replaceAll(" ", "")
		.replaceAll(".", "")

	const abbreviationMatch =
		chainAbbreviation &&
		(processedStore.includes(chainAbbreviation) || chainAbbreviation.includes(processedStore))

	const chainWords = chain.toLowerCase().replaceAll("-", " ").split(" ")
	const abbreviationScore = abbreviationMatch ? chainWords.length : 1

	return abbreviationScore
}
