/*
 * Decompiled with CFR 0.152.
 */
package nu.validator.checker;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.cybozu.labs.langdetect.Language;
import com.ibm.icu.util.ULocale;
import io.mola.galimatias.GalimatiasParseException;
import io.mola.galimatias.Host;
import io.mola.galimatias.URL;
import jakarta.servlet.http.HttpServletRequest;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import nu.validator.checker.Checker;
import nu.validator.checker.LocatorImpl;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

public class LanguageDetectingChecker
extends Checker {
    private static final String languageList = "nu/validator/localentities/files/language-profiles-list.txt";
    private static final String profilesDir = "nu/validator/localentities/files/language-profiles/";
    private static final Map<String, String[]> LANG_TAGS_BY_TLD = new HashMap<String, String[]>();
    private String systemId;
    private String tld;
    private Locator htmlStartTagLocator;
    private StringBuilder elementContent;
    private StringBuilder documentContent;
    private String httpContentLangHeader;
    private String htmlElementLangAttrValue;
    private String declaredLangCode;
    private boolean hasHtmlElement;
    private boolean htmlElementHasLang;
    private String dirAttrValue;
    private boolean hasDir;
    private boolean inBody;
    private int currentOpenElementsInDifferentLang;
    private int currentOpenElementsWithSkipName = 0;
    private int nonWhitespaceCharacterCount;
    private static final int MAX_CHARS = 30720;
    private static final int MIN_CHARS = 1024;
    private static final double MIN_PROBABILITY = 0.9;
    private static final String[] RTL_LANGS = new String[]{"ar", "azb", "ckb", "dv", "fa", "he", "pnb", "ps", "sd", "ug", "ur"};
    private static final String[] SKIP_NAMES = new String[]{"a", "button", "details", "figcaption", "form", "li", "nav", "pre", "script", "select", "span", "style", "summary", "td", "textarea", "th", "tr"};
    private HttpServletRequest request;

    private static BufferedReader profileReaderForLanguageTag(String languageTag) throws UnsupportedEncodingException {
        return new BufferedReader(new InputStreamReader(LanguageDetectingChecker.class.getClassLoader().getResourceAsStream(profilesDir + languageTag), "UTF-8"));
    }

    private boolean shouldAppendToLangdetectContent() {
        return this.inBody && this.currentOpenElementsWithSkipName < 1 && this.currentOpenElementsInDifferentLang < 1 && this.nonWhitespaceCharacterCount < 30720;
    }

    private void setDocumentLanguage(String languageTag) {
        if (this.request != null) {
            this.request.setAttribute("http://validator.nu/properties/document-language", languageTag);
        }
    }

    private String getDetectedLanguageSerboCroatian() throws SAXException {
        if ("hr".equals(this.declaredLangCode) || "hr".equals(this.tld)) {
            return "hr";
        }
        if ("sr".equals(this.declaredLangCode) || ".rs".equals(this.tld)) {
            return "sr-latn";
        }
        if ("bs".equals(this.declaredLangCode) || ".ba".equals(this.tld)) {
            return "bs";
        }
        return "sh";
    }

    private void checkLangAttributeSerboCroatian() throws SAXException {
        String lowerCaseLang = this.htmlElementLangAttrValue.toLowerCase();
        String langWarning = "";
        if (!this.htmlElementHasLang) {
            langWarning = "This document appears to be written in either Croatian, Serbian, or Bosnian. Consider adding either \u201clang=\"hr\"\u201d, \u201clang=\"sr\"\u201d, or \u201clang=\"bs\"\u201d to the \u201chtml\u201d start tag.";
        } else if (!("hr".equals(this.declaredLangCode) || "sr".equals(this.declaredLangCode) || "bs".equals(this.declaredLangCode))) {
            langWarning = String.format("This document appears to be written in either Croatian, Serbian, or Bosnian, but the \u201chtml\u201d start tag has %s. Consider using either \u201clang=\"hr\"\u201d, \u201clang=\"sr\"\u201d, or \u201clang=\"bs\"\u201d instead.", this.getAttValueExpr("lang", lowerCaseLang));
        }
        if (!"".equals(langWarning)) {
            this.warn(langWarning, this.htmlStartTagLocator);
        }
    }

    private void checkLangAttributeNorwegian() throws SAXException {
        String lowerCaseLang = this.htmlElementLangAttrValue.toLowerCase();
        String langWarning = "";
        if (!this.htmlElementHasLang) {
            langWarning = "This document appears to be written in Norwegian Consider adding either \u201clang=\"nn\"\u201d or \u201clang=\"nb\"\u201d (or variant) to the \u201chtml\u201d start tag.";
        } else if (!("no".equals(this.declaredLangCode) || "nn".equals(this.declaredLangCode) || "nb".equals(this.declaredLangCode))) {
            langWarning = String.format("This document appears to be written in Norwegian, but the \u201chtml\u201d start tag has %s. Consider using either \u201clang=\"nn\"\u201d or \u201clang=\"nb\"\u201d (or variant) instead.", this.getAttValueExpr("lang", lowerCaseLang));
        }
        if (!"".equals(langWarning)) {
            this.warn(langWarning, this.htmlStartTagLocator);
        }
    }

    private void checkContentLanguageHeaderNorwegian(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode) throws SAXException {
        if ("".equals(this.httpContentLangHeader) || this.httpContentLangHeader.contains(",")) {
            return;
        }
        String lowerCaseContentLang = this.httpContentLangHeader.toLowerCase();
        String contentLangCode = new ULocale(lowerCaseContentLang).getLanguage();
        if (!("no".equals(contentLangCode) || "nn".equals(contentLangCode) || "nb".equals(contentLangCode))) {
            this.warn("This document appears to be written in Norwegian but the value of the HTTP \u201cContent-Language\u201d header is \u201c" + lowerCaseContentLang + "\u201d. Consider changing it to \u201cnn\u201d or \u201cnn\u201d (or variant) instead.");
        }
    }

    private void checkLangAttribute(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode, String preferredLanguageCode) throws SAXException {
        String langWarning = "";
        String lowerCaseLang = this.htmlElementLangAttrValue.toLowerCase();
        if (!this.htmlElementHasLang) {
            langWarning = String.format("This document appears to be written in %s. Consider adding \u201clang=\"%s\"\u201d (or variant) to the \u201chtml\u201d start tag.", detectedLanguageName, preferredLanguageCode);
        } else {
            if (this.request != null) {
                if ("".equals(lowerCaseLang)) {
                    this.request.setAttribute("http://validator.nu/properties/lang-empty", true);
                } else {
                    this.request.setAttribute("http://validator.nu/properties/lang-value", lowerCaseLang);
                }
            }
            if ("tl".equals(detectedLanguageCode) && ("ceb".equals(this.declaredLangCode) || "ilo".equals(this.declaredLangCode) || "pag".equals(this.declaredLangCode) || "war".equals(this.declaredLangCode))) {
                return;
            }
            if ("id".equals(detectedLanguageCode) && "min".equals(this.declaredLangCode)) {
                return;
            }
            if ("ms".equals(detectedLanguageCode) && "min".equals(this.declaredLangCode)) {
                return;
            }
            if ("hr".equals(detectedLanguageCode) && ("sr".equals(this.declaredLangCode) || "bs".equals(this.declaredLangCode) || "sh".equals(this.declaredLangCode))) {
                return;
            }
            if ("sr".equals(detectedLanguageCode) && ("hr".equals(this.declaredLangCode) || "bs".equals(this.declaredLangCode) || "sh".equals(this.declaredLangCode))) {
                return;
            }
            if ("bs".equals(detectedLanguageCode) && ("hr".equals(this.declaredLangCode) || "sr".equals(this.declaredLangCode) || "sh".equals(this.declaredLangCode))) {
                return;
            }
            if ("de".equals(detectedLanguageCode) && ("bar".equals(this.declaredLangCode) || "gsw".equals(this.declaredLangCode) || "lb".equals(this.declaredLangCode))) {
                return;
            }
            if ("zh".equals(detectedLanguageCode) && "yue".equals(lowerCaseLang)) {
                return;
            }
            if ("el".equals(detectedLanguageCode) && "grc".equals(this.declaredLangCode)) {
                return;
            }
            if ("es".equals(detectedLanguageCode) && ("an".equals(this.declaredLangCode) || "ast".equals(this.declaredLangCode))) {
                return;
            }
            if ("it".equals(detectedLanguageCode) && ("co".equals(this.declaredLangCode) || "pms".equals(this.declaredLangCode) || "vec".equals(this.declaredLangCode) || "lmo".equals(this.declaredLangCode) || "scn".equals(this.declaredLangCode) || "nap".equals(this.declaredLangCode))) {
                return;
            }
            if ("rw".equals(detectedLanguageCode) && "rn".equals(this.declaredLangCode)) {
                return;
            }
            if ("mhr".equals(detectedLanguageCode) && ("chm".equals(this.declaredLangCode) || "mrj".equals(this.declaredLangCode))) {
                return;
            }
            if ("mrj".equals(detectedLanguageCode) && ("chm".equals(this.declaredLangCode) || "mhr".equals(this.declaredLangCode))) {
                return;
            }
            if ("ru".equals(detectedLanguageCode) && "bg".equals(this.declaredLangCode)) {
                return;
            }
            String message = "This document appears to be written in %s but the \u201chtml\u201d start tag has %s. Consider using \u201clang=\"%s\"\u201d (or variant) instead.";
            if (this.zhSubtagMismatch(detectedLanguage, lowerCaseLang) || !this.declaredLangCode.equals(detectedLanguageCode)) {
                if (this.request != null) {
                    this.request.setAttribute("http://validator.nu/properties/lang-wrong", true);
                }
                langWarning = String.format(message, detectedLanguageName, this.getAttValueExpr("lang", this.htmlElementLangAttrValue), preferredLanguageCode);
            }
        }
        if (!"".equals(langWarning)) {
            this.warn(langWarning, this.htmlStartTagLocator);
        }
    }

    private void checkContentLanguageHeader(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode, String preferredLanguageCode) throws SAXException {
        if ("".equals(this.httpContentLangHeader) || this.httpContentLangHeader.contains(",")) {
            return;
        }
        String message = "";
        String lowerCaseContentLang = this.httpContentLangHeader.toLowerCase();
        String contentLangCode = new ULocale(lowerCaseContentLang).getLanguage();
        if ("tl".equals(detectedLanguageCode) && ("ceb".equals(contentLangCode) || "ilo".equals(contentLangCode) || "pag".equals(contentLangCode) || "war".equals(contentLangCode))) {
            return;
        }
        if ("id".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
            return;
        }
        if ("ms".equals(detectedLanguageCode) && "min".equals(contentLangCode)) {
            return;
        }
        if ("hr".equals(detectedLanguageCode) && ("sr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
            return;
        }
        if ("sr".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "bs".equals(contentLangCode) || "sh".equals(contentLangCode))) {
            return;
        }
        if ("bs".equals(detectedLanguageCode) && ("hr".equals(contentLangCode) || "sr".equals(contentLangCode) || "sh".equals(contentLangCode))) {
            return;
        }
        if ("de".equals(detectedLanguageCode) && ("bar".equals(contentLangCode) || "gsw".equals(contentLangCode) || "lb".equals(contentLangCode))) {
            return;
        }
        if ("zh".equals(detectedLanguageCode) && "yue".equals(lowerCaseContentLang)) {
            return;
        }
        if ("el".equals(detectedLanguageCode) && "grc".equals(contentLangCode)) {
            return;
        }
        if ("es".equals(detectedLanguageCode) && ("an".equals(contentLangCode) || "ast".equals(contentLangCode))) {
            return;
        }
        if ("it".equals(detectedLanguageCode) && ("co".equals(contentLangCode) || "pms".equals(contentLangCode) || "vec".equals(contentLangCode) || "lmo".equals(contentLangCode) || "scn".equals(contentLangCode) || "nap".equals(contentLangCode))) {
            return;
        }
        if ("rw".equals(detectedLanguageCode) && "rn".equals(contentLangCode)) {
            return;
        }
        if ("mhr".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mrj".equals(contentLangCode))) {
            return;
        }
        if ("mrj".equals(detectedLanguageCode) && ("chm".equals(contentLangCode) || "mhr".equals(contentLangCode))) {
            return;
        }
        if ("ru".equals(detectedLanguageCode) && "bg".equals(contentLangCode)) {
            return;
        }
        if (this.zhSubtagMismatch(detectedLanguage, lowerCaseContentLang) || !contentLangCode.equals(detectedLanguageCode)) {
            message = "This document appears to be written in %s but the value of the HTTP \u201cContent-Language\u201d header is \u201c%s\u201d. Consider changing it to \u201c%s\u201d (or variant).";
            this.warn(String.format(message, detectedLanguageName, lowerCaseContentLang, preferredLanguageCode, preferredLanguageCode));
        }
        if (this.htmlElementHasLang) {
            message = "The value of the HTTP \u201cContent-Language\u201d header is \u201c%s\u201d but it will be ignored because the \u201chtml\u201d start tag has %s.";
            String lowerCaseLang = this.htmlElementLangAttrValue.toLowerCase();
            if (this.htmlElementHasLang && (this.zhSubtagMismatch(lowerCaseContentLang, lowerCaseLang) || !contentLangCode.equals(this.declaredLangCode))) {
                this.warn(String.format(message, this.httpContentLangHeader, this.getAttValueExpr("lang", this.htmlElementLangAttrValue)), this.htmlStartTagLocator);
            }
        }
    }

    private void checkDirAttribute(String detectedLanguage, String detectedLanguageName, String detectedLanguageCode, String preferredLanguageCode) throws SAXException {
        if (Arrays.binarySearch(RTL_LANGS, detectedLanguageCode) < 0) {
            return;
        }
        String dirWarning = "";
        if (!this.hasDir) {
            dirWarning = String.format("This document appears to be written in %s. Consider adding \u201cdir=\"rtl\"\u201d to the \u201chtml\u201d start tag.", detectedLanguageName, preferredLanguageCode);
        } else if (!"rtl".equals(this.dirAttrValue)) {
            String message = "This document appears to be written in %s but the \u201chtml\u201d start tag has %s. Consider using \u201cdir=\"rtl\"\u201d instead.";
            dirWarning = String.format(message, detectedLanguageName, this.getAttValueExpr("dir", this.dirAttrValue));
        }
        if (!"".equals(dirWarning)) {
            this.warn(dirWarning, this.htmlStartTagLocator);
        }
    }

    private boolean zhSubtagMismatch(String expectedLanguage, String declaredLanguage) {
        return "zh-hans".equals(expectedLanguage) && (declaredLanguage.contains("zh-tw") || declaredLanguage.contains("zh-hant")) || "zh-hant".equals(expectedLanguage) && (declaredLanguage.contains("zh-cn") || declaredLanguage.contains("zh-hans"));
    }

    private String getAttValueExpr(String attName, String attValue) {
        if ("".equals(attValue)) {
            return String.format("an empty \u201c%s\u201d attribute", attName);
        }
        return String.format("\u201c%s=\"%s\"\u201d", attName, attValue);
    }

    public void setHttpContentLanguageHeader(String httpContentLangHeader) {
        if (httpContentLangHeader != null) {
            this.httpContentLangHeader = httpContentLangHeader;
        }
    }

    @Override
    public void endDocument() throws SAXException {
        if (this.documentContent.length() > 1024 && !"0".equals(System.getProperty("nu.validator.checker.enableLangDetection")) && this.htmlStartTagLocator != null) {
            this.detectLanguageAndCheckAgainstDeclaredLanguage();
        } else {
            this.warnIfMissingLang();
        }
    }

    private void warnIfMissingLang() throws SAXException {
        if (this.hasHtmlElement && !this.htmlElementHasLang && !"true".equals(System.getProperty("nu.validator.checker.ignoreMissingLang"))) {
            String message = "Consider adding a \u201clang\u201d attribute to the \u201chtml\u201d start tag to declare the language of this document.";
            this.warn(message, this.htmlStartTagLocator);
        }
    }

    private void detectLanguageAndCheckAgainstDeclaredLanguage() throws SAXException {
        if ("zxx".equals(this.declaredLangCode) || "eo".equals(this.declaredLangCode) || "la".equals(this.declaredLangCode)) {
            return;
        }
        if (LANG_TAGS_BY_TLD.containsKey(this.tld) && Arrays.binarySearch(LANG_TAGS_BY_TLD.get(this.tld), this.declaredLangCode) >= 0) {
            return;
        }
        try {
            String textContent = this.documentContent.toString().replaceAll("\\s+", " ");
            String detectedLanguage = "";
            Detector detector = DetectorFactory.create();
            detector.append(textContent);
            detector.getProbabilities();
            ArrayList<String> possibileLanguages = new ArrayList<String>();
            ArrayList<Language> possibilities = detector.getProbabilities();
            for (Language possibility : possibilities) {
                possibileLanguages.add(possibility.lang);
                if (possibility.prob > 0.9) {
                    detectedLanguage = possibility.lang;
                    this.setDocumentLanguage(detectedLanguage);
                    continue;
                }
                if ((!possibileLanguages.contains("hr") || !possibileLanguages.contains("sr-latn") && !possibileLanguages.contains("bs")) && (!possibileLanguages.contains("sr-latn") || !possibileLanguages.contains("hr") && !possibileLanguages.contains("bs")) && (!possibileLanguages.contains("bs") || !possibileLanguages.contains("hr") && !possibileLanguages.contains("sr-latn"))) continue;
                if (this.htmlElementHasLang || this.systemId != null) {
                    detectedLanguage = this.getDetectedLanguageSerboCroatian();
                    this.setDocumentLanguage(detectedLanguage);
                }
                if (!"sh".equals(detectedLanguage)) continue;
                this.checkLangAttributeSerboCroatian();
                return;
            }
            if ("".equals(detectedLanguage)) {
                this.warnIfMissingLang();
                return;
            }
            String detectedLanguageName = "";
            String preferredLanguageCode = "";
            ULocale locale = new ULocale(detectedLanguage);
            String detectedLanguageCode = locale.getLanguage();
            if ("no".equals(detectedLanguage)) {
                this.checkLangAttributeNorwegian();
                this.checkContentLanguageHeaderNorwegian(detectedLanguage, detectedLanguageName, detectedLanguageCode);
                return;
            }
            if ("zh-hans".equals(detectedLanguage)) {
                detectedLanguageName = "Simplified Chinese";
                preferredLanguageCode = "zh-hans";
            } else if ("zh-hant".equals(detectedLanguage)) {
                detectedLanguageName = "Traditional Chinese";
                preferredLanguageCode = "zh-hant";
            } else if ("mhr".equals(detectedLanguage)) {
                detectedLanguageName = "Meadow Mari";
                preferredLanguageCode = "mhr";
            } else if ("mrj".equals(detectedLanguage)) {
                detectedLanguageName = "Hill Mari";
                preferredLanguageCode = "mrj";
            } else if ("nah".equals(detectedLanguage)) {
                detectedLanguageName = "Nahuatl";
                preferredLanguageCode = "nah";
            } else if ("pnb".equals(detectedLanguage)) {
                detectedLanguageName = "Western Panjabi";
                preferredLanguageCode = "pnb";
            } else if ("sr-cyrl".equals(detectedLanguage)) {
                detectedLanguageName = "Serbian";
                preferredLanguageCode = "sr";
            } else if ("sr-latn".equals(detectedLanguage)) {
                detectedLanguageName = "Serbian";
                preferredLanguageCode = "sr";
            } else if ("uz-cyrl".equals(detectedLanguage)) {
                detectedLanguageName = "Uzbek";
                preferredLanguageCode = "uz";
            } else if ("uz-latn".equals(detectedLanguage)) {
                detectedLanguageName = "Uzbek";
                preferredLanguageCode = "uz";
            } else if ("zxx".equals(detectedLanguage)) {
                detectedLanguageName = "Lorem ipsum text";
                preferredLanguageCode = "zxx";
            } else {
                detectedLanguageName = locale.getDisplayName();
                preferredLanguageCode = detectedLanguageCode;
            }
            this.checkLangAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
            this.checkDirAttribute(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
            this.checkContentLanguageHeader(detectedLanguage, detectedLanguageName, detectedLanguageCode, preferredLanguageCode);
        }
        catch (LangDetectException langDetectException) {
            // empty catch block
        }
    }

    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {
        if ("http://www.w3.org/1999/xhtml" != uri) {
            this.elementContent.setLength(0);
            return;
        }
        if (Arrays.binarySearch(SKIP_NAMES, localName) < 0 && this.nonWhitespaceCharacterCount < 30720) {
            this.documentContent.append((CharSequence)this.elementContent);
            this.elementContent.setLength(0);
        }
        if ("body".equals(localName)) {
            this.inBody = false;
            this.currentOpenElementsWithSkipName = 0;
        }
        if (this.currentOpenElementsInDifferentLang > 0) {
            --this.currentOpenElementsInDifferentLang;
            if (this.currentOpenElementsInDifferentLang < 0) {
                this.currentOpenElementsInDifferentLang = 0;
            }
        } else if (Arrays.binarySearch(SKIP_NAMES, localName) >= 0) {
            this.elementContent.setLength(0);
            --this.currentOpenElementsWithSkipName;
            if (this.currentOpenElementsWithSkipName < 0) {
                this.currentOpenElementsWithSkipName = 0;
            }
        }
    }

    @Override
    public void startDocument() throws SAXException {
        this.request = this.getRequest();
        this.httpContentLangHeader = "";
        this.tld = "";
        this.htmlStartTagLocator = null;
        this.inBody = false;
        this.currentOpenElementsInDifferentLang = 0;
        this.currentOpenElementsWithSkipName = 0;
        this.nonWhitespaceCharacterCount = 0;
        this.elementContent = new StringBuilder();
        this.documentContent = new StringBuilder();
        this.hasHtmlElement = false;
        this.htmlElementHasLang = false;
        this.htmlElementLangAttrValue = "";
        this.declaredLangCode = "";
        this.hasDir = false;
        this.dirAttrValue = "";
        this.documentContent.setLength(0);
        this.currentOpenElementsWithSkipName = 0;
        try {
            Host hostname;
            this.systemId = this.getDocumentLocator().getSystemId();
            if (this.systemId != null && this.systemId.startsWith("http") && (hostname = URL.parse(this.systemId).host()) != null) {
                String host = hostname.toString();
                this.tld = host.substring(host.lastIndexOf(".") + 1);
            }
        }
        catch (GalimatiasParseException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
        if ("http://www.w3.org/1999/xhtml" != uri || Arrays.binarySearch(SKIP_NAMES, localName) >= 0) {
            return;
        }
        if ("html".equals(localName) && "http://www.w3.org/1999/xhtml" == uri) {
            this.hasHtmlElement = true;
            this.htmlStartTagLocator = new LocatorImpl(this.getDocumentLocator());
            for (int i = 0; i < atts.getLength(); ++i) {
                if ("lang".equals(atts.getLocalName(i))) {
                    if (this.request != null) {
                        this.request.setAttribute("http://validator.nu/properties/lang-found", true);
                    }
                    this.htmlElementHasLang = true;
                    this.htmlElementLangAttrValue = atts.getValue(i);
                    try {
                        this.declaredLangCode = new ULocale(this.htmlElementLangAttrValue).getLanguage();
                    }
                    catch (IllegalArgumentException e) {
                        String message = "The \u201chtml\u201d start tag has a malformed value for its \u201clang\u201d attribute.";
                        this.warn(message, this.htmlStartTagLocator);
                    }
                    continue;
                }
                if (!"dir".equals(atts.getLocalName(i))) continue;
                this.hasDir = true;
                this.dirAttrValue = atts.getValue(i);
            }
        } else if ("body".equals(localName)) {
            this.inBody = true;
        } else if (this.inBody) {
            if (this.currentOpenElementsInDifferentLang > 0) {
                ++this.currentOpenElementsInDifferentLang;
            } else {
                for (int i = 0; i < atts.getLength(); ++i) {
                    if (!"lang".equals(atts.getLocalName(i)) || "".equals(this.htmlElementLangAttrValue) || this.htmlElementLangAttrValue.equals(atts.getValue(i))) continue;
                    ++this.currentOpenElementsInDifferentLang;
                }
            }
        }
        if (Arrays.binarySearch(SKIP_NAMES, localName) >= 0) {
            ++this.currentOpenElementsWithSkipName;
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        if (this.shouldAppendToLangdetectContent()) {
            this.elementContent.append(ch, start, length);
        }
        block3: for (int i = start; i < start + length; ++i) {
            char c = ch[i];
            switch (c) {
                case '\t': 
                case '\n': 
                case '\r': 
                case ' ': 
                case '#': 
                case '0': 
                case '1': 
                case '2': 
                case '3': 
                case '4': 
                case '5': 
                case '6': 
                case '7': 
                case '8': 
                case '9': {
                    continue block3;
                }
                default: {
                    if (!this.shouldAppendToLangdetectContent()) continue block3;
                    ++this.nonWhitespaceCharacterCount;
                }
            }
        }
    }

    static {
        if (!"0".equals(System.getProperty("nu.validator.checker.enableLangDetection"))) {
            LANG_TAGS_BY_TLD.put("ae", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("af", new String[]{"ps"});
            LANG_TAGS_BY_TLD.put("am", new String[]{"hy"});
            LANG_TAGS_BY_TLD.put("ar", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("at", new String[]{"de"});
            LANG_TAGS_BY_TLD.put("az", new String[]{"az"});
            LANG_TAGS_BY_TLD.put("ba", new String[]{"bs", "hr", "sr"});
            LANG_TAGS_BY_TLD.put("bd", new String[]{"bn"});
            LANG_TAGS_BY_TLD.put("be", new String[]{"de", "fr", "nl"});
            LANG_TAGS_BY_TLD.put("bg", new String[]{"bg"});
            LANG_TAGS_BY_TLD.put("bh", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("bo", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("br", new String[]{"pt"});
            LANG_TAGS_BY_TLD.put("by", new String[]{"be"});
            LANG_TAGS_BY_TLD.put("bz", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("ch", new String[]{"de", "fr", "it", "rm"});
            LANG_TAGS_BY_TLD.put("cl", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("co", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("cu", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("cr", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("cz", new String[]{"cs"});
            LANG_TAGS_BY_TLD.put("de", new String[]{"de"});
            LANG_TAGS_BY_TLD.put("dk", new String[]{"da"});
            LANG_TAGS_BY_TLD.put("do", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("ec", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("ee", new String[]{"et"});
            LANG_TAGS_BY_TLD.put("eg", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("es", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("fi", new String[]{"fi"});
            LANG_TAGS_BY_TLD.put("fr", new String[]{"fr"});
            LANG_TAGS_BY_TLD.put("ge", new String[]{"ka"});
            LANG_TAGS_BY_TLD.put("gr", new String[]{"el"});
            LANG_TAGS_BY_TLD.put("gt", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("hn", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("hr", new String[]{"hr"});
            LANG_TAGS_BY_TLD.put("hu", new String[]{"hu"});
            LANG_TAGS_BY_TLD.put("id", new String[]{"id"});
            LANG_TAGS_BY_TLD.put("is", new String[]{"is"});
            LANG_TAGS_BY_TLD.put("it", new String[]{"it"});
            LANG_TAGS_BY_TLD.put("il", new String[]{"iw"});
            LANG_TAGS_BY_TLD.put("in", new String[]{"bn", "gu", "hi", "kn", "ml", "mr", "pa", "ta", "te"});
            LANG_TAGS_BY_TLD.put("ja", new String[]{"jp"});
            LANG_TAGS_BY_TLD.put("jo", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("ke", new String[]{"sw"});
            LANG_TAGS_BY_TLD.put("kg", new String[]{"ky"});
            LANG_TAGS_BY_TLD.put("kh", new String[]{"km"});
            LANG_TAGS_BY_TLD.put("kr", new String[]{"ko"});
            LANG_TAGS_BY_TLD.put("kw", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("kz", new String[]{"kk"});
            LANG_TAGS_BY_TLD.put("la", new String[]{"lo"});
            LANG_TAGS_BY_TLD.put("li", new String[]{"de"});
            LANG_TAGS_BY_TLD.put("lb", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("lk", new String[]{"si", "ta"});
            LANG_TAGS_BY_TLD.put("lt", new String[]{"lt"});
            LANG_TAGS_BY_TLD.put("lu", new String[]{"de"});
            LANG_TAGS_BY_TLD.put("lv", new String[]{"lv"});
            LANG_TAGS_BY_TLD.put("md", new String[]{"mo"});
            LANG_TAGS_BY_TLD.put("mk", new String[]{"mk"});
            LANG_TAGS_BY_TLD.put("mn", new String[]{"mn"});
            LANG_TAGS_BY_TLD.put("mx", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("my", new String[]{"ms"});
            LANG_TAGS_BY_TLD.put("ni", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("nl", new String[]{"nl"});
            LANG_TAGS_BY_TLD.put("no", new String[]{"nn", "no"});
            LANG_TAGS_BY_TLD.put("np", new String[]{"ne"});
            LANG_TAGS_BY_TLD.put("pa", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("pe", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("ph", new String[]{"tl"});
            LANG_TAGS_BY_TLD.put("pl", new String[]{"pl"});
            LANG_TAGS_BY_TLD.put("pk", new String[]{"ur"});
            LANG_TAGS_BY_TLD.put("pr", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("pt", new String[]{"pt"});
            LANG_TAGS_BY_TLD.put("py", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("qa", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("ro", new String[]{"ro"});
            LANG_TAGS_BY_TLD.put("rs", new String[]{"sr"});
            LANG_TAGS_BY_TLD.put("ru", new String[]{"ru"});
            LANG_TAGS_BY_TLD.put("sa", new String[]{"ar"});
            LANG_TAGS_BY_TLD.put("se", new String[]{"sv"});
            LANG_TAGS_BY_TLD.put("si", new String[]{"sl"});
            LANG_TAGS_BY_TLD.put("sk", new String[]{"sk"});
            LANG_TAGS_BY_TLD.put("sv", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("th", new String[]{"th"});
            LANG_TAGS_BY_TLD.put("tj", new String[]{"tg"});
            LANG_TAGS_BY_TLD.put("tm", new String[]{"tk"});
            LANG_TAGS_BY_TLD.put("ua", new String[]{"uk"});
            LANG_TAGS_BY_TLD.put("uy", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("uz", new String[]{"uz"});
            LANG_TAGS_BY_TLD.put("ve", new String[]{"es"});
            LANG_TAGS_BY_TLD.put("vn", new String[]{"vi"});
            LANG_TAGS_BY_TLD.put("za", new String[]{"af"});
            try {
                BufferedReader br = new BufferedReader(new InputStreamReader(LanguageDetectingChecker.class.getClassLoader().getResourceAsStream(languageList), "UTF-8"));
                ArrayList<String> languageTags = new ArrayList<String>();
                String languageTagAndName = br.readLine();
                while (languageTagAndName != null) {
                    languageTags.add(languageTagAndName.split("\t")[0]);
                    languageTagAndName = br.readLine();
                }
                ArrayList<String> profiles = new ArrayList<String>();
                for (String languageTag : languageTags) {
                    profiles.add(LanguageDetectingChecker.profileReaderForLanguageTag(languageTag).readLine());
                }
                DetectorFactory.clear();
                DetectorFactory.loadProfile(profiles);
                try {
                    long seed = Long.parseLong(System.getProperty("nu.validator.checker.langDetectionSeed"));
                    DetectorFactory.setSeed(seed);
                }
                catch (NumberFormatException numberFormatException) {}
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
            catch (LangDetectException langDetectException) {
                // empty catch block
            }
        }
    }
}

