#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Detect English module
"""
https://www.nostarch.com/crackingcodes (BSD Licensed)
To use, type this code:
>>> import detectEnglish
>>> detectEnglish.isEnglish(someString) # returns True or False
(There must be a "dictionary.txt" file in this directory with all English
words in it, one word per line. You can download this from
https://invpy.com/dictionary.txt)
"""

from typing import Dict

UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"


# Why use a dictionary here instead of a list?
# What would have been a better data structure, and less of a hack?
def loadDictionary() -> Dict[str, None]:
    dictionaryFile = open("dictionary.txt")
    # should have been a set...
    englishWords: Dict[str, None] = {}
    for word in dictionaryFile.read().split("\n"):
        englishWords[word] = None
    dictionaryFile.close()
    return englishWords


ENGLISH_WORDS = loadDictionary()


def getEnglishCount(message: str) -> float:
    message = message.upper()
    message = removeNonLetters(message)
    possibleWords = message.split()
    if possibleWords == []:
        return 0.0  # No words at all, so return 0.0.
    matches = 0
    for word in possibleWords:
        if word in ENGLISH_WORDS:
            matches += 1
    return float(matches) / len(possibleWords)


def removeNonLetters(message: str) -> str:
    lettersOnly = []
    for symbol in message:
        if symbol in LETTERS_AND_SPACE:
            lettersOnly.append(symbol)
    return "".join(lettersOnly)


def isEnglish(
    message: str, wordPercentage: int = 20, letterPercentage: int = 85
) -> bool:
    # By default, 20% of the words must exist in the dictionary file, and
    # 85% of all the characters in the message must be letters or spaces
    # (not punctuation or numbers).
    wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
    numLetters = len(removeNonLetters(message))
    messageLettersPercentage = float(numLetters) / len(message) * 100
    lettersMatch = messageLettersPercentage >= letterPercentage
    return wordsMatch and lettersMatch