package org.cleartk.util.treebank;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.util.XMLConstants;

/* loaded from: input_file:WEB-INF/lib/cleartk-util-2.0.0.jar:org/cleartk/util/treebank/TreebankFormatParser.class */
public class TreebankFormatParser {
    public static final String LEAF_NODE_REGEX = "\\(([^( )]+) ([^( )]+)\\s*\\)";
    private static Pattern leafNodePattern = Pattern.compile(LEAF_NODE_REGEX);
    public static final String TYPE_REGEX = "^\\(([^() ]+)";
    private static Pattern typePattern = Pattern.compile(TYPE_REGEX);
    public static final String cleanUPRegex1 = "\\s+";
    private static final Pattern cleanUpPattern1 = Pattern.compile(cleanUPRegex1, 8);
    public static final String cleanUPRegex2 = "\\( \\(";
    private static final Pattern cleanUpPattern2 = Pattern.compile(cleanUPRegex2, 8);
    public static final String cleanUPRegex3 = "\\) \\)";
    private static final Pattern cleanUpPattern3 = Pattern.compile(cleanUPRegex3, 8);
    public static final String cleanUPRegex4 = "\\s*\\(\\s*\\(";
    private static final Pattern cleanUpPattern4 = Pattern.compile(cleanUPRegex4, 8);
    private static final Pattern nonwhiteSpaceCharPattern = Pattern.compile("[^\\s]");

    public static TreebankNode getLeafNode(String str) {
        Matcher matcher = leafNodePattern.matcher(str);
        if (!matcher.matches()) {
            return null;
        }
        String group = matcher.group(1);
        String group2 = matcher.group(2);
        TreebankNode treebankNode = new TreebankNode();
        treebankNode.setType(getTypeFromType(group));
        treebankNode.setTags(getTagsFromType(group));
        treebankNode.setValue(group2);
        treebankNode.setLeaf(true);
        treebankNode.setText(getToken(treebankNode.getValue(), treebankNode.getType()));
        return treebankNode;
    }

    private static String getTypeFromType(String str) {
        return str.startsWith("-") ? str.substring(0, str.indexOf(45, 1) + 1) : str.split("[-=]")[0];
    }

    private static String[] getTagsFromType(String str) {
        if (str.startsWith("-")) {
            String substring = str.substring(str.indexOf(45, 1) + 1);
            return substring.length() > 0 ? substring.split("[-=]") : new String[0];
        }
        String[] split = str.split("[-=]");
        String[] strArr = new String[split.length - 1];
        for (int i = 1; i < split.length; i++) {
            strArr[i - 1] = split[i];
        }
        return strArr;
    }

    public static String getType(String str) {
        Matcher matcher = typePattern.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    public static String prepareString(String str) {
        return cleanUpPattern4.matcher(cleanUpPattern3.matcher(cleanUpPattern2.matcher(cleanUpPattern1.matcher(str).replaceAll(" ")).replaceAll("((")).replaceAll("))")).replaceFirst("(TOP (").trim();
    }

    public static String inferPlainText(String str) {
        StringBuilder sb = new StringBuilder();
        for (String str2 : splitSentences(str)) {
            Matcher matcher = leafNodePattern.matcher(str2);
            while (matcher.find()) {
                TreebankNode leafNode = getLeafNode(matcher.group());
                if (leafNode.getText() != null && leafNode.getText().length() > 0) {
                    int length = sb.length() - 1;
                    if (length > 0 && !needsSpaceBefore(leafNode.getText()) && sb.charAt(length) == ' ') {
                        sb.deleteCharAt(length);
                    }
                    sb.append(leafNode.getText());
                    if (needsSpaceAfter(leafNode.getText())) {
                        sb.append(" ");
                    }
                }
            }
            int length2 = sb.length() - 1;
            if (length2 >= 0 && sb.charAt(length2) == ' ') {
                sb.deleteCharAt(length2);
            }
            sb.append('\n');
        }
        return sb.toString().trim();
    }

    private static boolean needsSpaceBefore(String str) {
        for (String str2 : new String[]{".", ",", ":", ";", "?", "'s", "'t", XMLConstants.XML_DOUBLE_QUOTE, "!", ")", "]"}) {
            if (str.equals(str2)) {
                return false;
            }
        }
        return true;
    }

    private static boolean needsSpaceAfter(String str) {
        for (String str2 : new String[]{XMLConstants.XML_DOUBLE_QUOTE, "(", "["}) {
            if (str.equals(str2)) {
                return false;
            }
        }
        return true;
    }

    public static TopTreebankNode parse(String str) {
        String prepareString = prepareString(str);
        return parse(prepareString, inferPlainText(prepareString).trim(), 0);
    }

    private static void checkText(TreebankNode treebankNode, String str) {
        String text = treebankNode.getText();
        String substring = str.substring(treebankNode.getTextBegin(), treebankNode.getTextEnd());
        if (text.equals(substring)) {
            return;
        }
        String substring2 = text.substring(0, text.length() - 1);
        String substring3 = substring.substring(0, substring.length() - 1);
        if (!text.endsWith(".") || !substring2.equals(substring3)) {
            throw new IllegalArgumentException("plain text does not align with tokens in treebank parse.  node text = '" + text + "'  plain text = '" + substring + "'");
        }
        treebankNode.setTextEnd(treebankNode.getTextEnd() - 1);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v34, types: [org.cleartk.util.treebank.TreebankNode] */
    public static TopTreebankNode parse(String str, String str2, int i) {
        try {
            TopTreebankNode topTreebankNode = new TopTreebankNode();
            str = prepareString(str);
            StringBuffer stringBuffer = new StringBuffer();
            if (str2 != null) {
                stringBuffer.append(str2.substring(0, movePastWhiteSpaceChars(str2, i)));
            }
            Stack stack = new Stack();
            Stack stack2 = new Stack();
            Stack stack3 = new Stack();
            int i2 = 0;
            while (i2 < str.length()) {
                char charAt = str.charAt(i2);
                if (charAt == '(') {
                    stack.push(Integer.valueOf(i2));
                    stack2.push(Integer.valueOf(stringBuffer.length()));
                } else if (charAt == ')') {
                    int intValue = ((Integer) stack.pop()).intValue();
                    int i3 = i2;
                    String substring = str.substring(intValue, i3 + 1);
                    int intValue2 = ((Integer) stack2.pop()).intValue();
                    TreebankNode leafNode = getLeafNode(substring);
                    if (leafNode != null) {
                        leafNode.setTopNode(topTreebankNode);
                        leafNode.setParseBegin(intValue);
                        leafNode.setParseEnd(i3 + 1);
                        String text = leafNode.getText();
                        if (text.length() > 0) {
                            int movePastWhiteSpaceChars = movePastWhiteSpaceChars(str2, intValue2);
                            stringBuffer.append(str2.substring(intValue2, movePastWhiteSpaceChars));
                            stringBuffer.append(text);
                            leafNode.setTextBegin(movePastWhiteSpaceChars);
                            leafNode.setTextEnd(movePastWhiteSpaceChars + text.length());
                        } else {
                            leafNode.setTextBegin(intValue2);
                            leafNode.setTextEnd(intValue2 + text.length());
                        }
                        checkText(leafNode, str2);
                        stack3.push(leafNode);
                    } else {
                        TreebankNode treebankNode = str.lastIndexOf(41) == i2 ? topTreebankNode : new TreebankNode();
                        treebankNode.setTopNode(topTreebankNode);
                        treebankNode.setParseBegin(intValue);
                        treebankNode.setParseEnd(i3 + 1);
                        String type = getType(substring);
                        treebankNode.setType(getTypeFromType(type));
                        treebankNode.setTags(getTagsFromType(type));
                        treebankNode.setLeaf(false);
                        while (stack3.size() > 0 && ((TreebankNode) stack3.peek()).getParseBegin() > treebankNode.getParseBegin()) {
                            TreebankNode treebankNode2 = (TreebankNode) stack3.pop();
                            treebankNode.addChild(treebankNode2);
                            treebankNode2.setParent(treebankNode);
                        }
                        int movePastWhiteSpaceChars2 = movePastWhiteSpaceChars(str2, intValue2);
                        treebankNode.setTextBegin(movePastWhiteSpaceChars2);
                        treebankNode.setTextEnd(Math.max(movePastWhiteSpaceChars2, stringBuffer.length()));
                        try {
                            treebankNode.setText(stringBuffer.substring(treebankNode.getTextBegin(), treebankNode.getTextEnd()));
                        } catch (StringIndexOutOfBoundsException e) {
                            treebankNode.setText("");
                        }
                        checkText(treebankNode, str2);
                        stack3.push(treebankNode);
                    }
                }
                i2++;
            }
            topTreebankNode.setTreebankParse(str);
            topTreebankNode.initTerminalNodes();
            return topTreebankNode;
        } catch (RuntimeException e2) {
            throw new IllegalArgumentException("exception thrown when parsing the following: " + str, e2);
        }
    }

    public static int movePastWhiteSpaceChars(String str, int i) {
        Matcher matcher = nonwhiteSpaceCharPattern.matcher(str);
        return matcher.find(i) ? matcher.start() : i;
    }

    private static String getToken(String str, String str2) {
        String replace = str.replace("-RCB-", "}").replace("-LCB-", "{").replace("-RRB-", ")").replace("-LRB-", "(").replace("-RSB-", "]").replace("-LSB-", "[").replace("``", XMLConstants.XML_DOUBLE_QUOTE).replace("''", XMLConstants.XML_DOUBLE_QUOTE);
        return str2.equals("-NONE-") ? "" : replace.contains("\\/") ? replace.replace("\\/", "/") : replace;
    }

    public static String[] splitSentences(String str) {
        String[] split = str.split("(?=\\(\\s*\\()");
        if (split.length > 1) {
            if (split.length <= 0 || !split[0].trim().equals("")) {
                String[] strArr = new String[split.length];
                System.arraycopy(split, 0, strArr, 0, strArr.length);
                return strArr;
            }
            String[] strArr2 = new String[split.length - 1];
            System.arraycopy(split, 1, strArr2, 0, strArr2.length);
            return strArr2;
        }
        String[] split2 = str.split("\r?\n");
        for (String str2 : split2) {
            if (!parensMatch(str2)) {
                throw new IllegalArgumentException("Parentheses counts do not match for treebank sentence: " + str2);
            }
        }
        return split2;
    }

    public static boolean parensMatch(String str) {
        int i = 0;
        int i2 = 0;
        for (char c : str.toCharArray()) {
            if (c == '(') {
                i++;
            }
            if (c == ')') {
                i2++;
            }
        }
        return i == i2;
    }

    public static List<TopTreebankNode> parseDocument(String str, int i, String str2) {
        ArrayList arrayList = new ArrayList();
        for (String str3 : splitSentences(str)) {
            TopTreebankNode parse = parse(str3, str2, i);
            i = parse.getTextEnd();
            arrayList.add(parse);
        }
        return arrayList;
    }
}
