package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ie.pascal.ISODateInstance;
import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicHeadFinder;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams.class */
public class ArabicTreebankParserParams extends AbstractTreebankParserParams {
    private static final long serialVersionUID = 8853426784197984653L;
    private final StringBuilder optionsString;
    private boolean retainNPTmp;
    private boolean retainNPSbj;
    private boolean retainPRD;
    private boolean retainPPClr;
    private boolean changeNoLabels;
    private boolean collinizerRetainsPunctuation;
    private boolean discardX;
    private HeadFinder headFinder;
    private final Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns;
    private final List<Pair<TregexPattern, Function<TregexMatcher, String>>> activeAnnotations;
    private MorphoFeatureSpecification morphoSpec;
    private final List<String> baselineFeatures;
    private final List<String> additionalFeatures;
    private static final Redwood.RedwoodChannels log = Redwood.channels(ArabicTreebankParserParams.class);
    private static final MorphoFeatureSpecification tagSpec = new ArabicMorphoFeatureSpecification();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AddEquivalencedConjNode.class */
    public static class AddEquivalencedConjNode implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final String nnTags = "DTNN DTNNP DTNNPS DTNNS NN NNP NNS NNPS";
        private static final Set<String> nnTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(nnTags.split("\\s+"))));
        private static final String jjTags = "ADJ_NUM DTJJ DTJJR JJ JJR";
        private static final Set<String> jjTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(jjTags.split("\\s+"))));
        private static final String vbTags = "VBD VBP";
        private static final Set<String> vbTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(vbTags.split("\\s+"))));
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1;

        public AddEquivalencedConjNode(String str, String str2) {
            this.annotationMark = str;
            this.key = str2;
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            String basicCategory = tlp.basicCategory(tregexMatcher.getNode(this.key).value());
            if (nnTagClass.contains(basicCategory)) {
                basicCategory = "noun";
            } else if (jjTagClass.contains(basicCategory)) {
                basicCategory = "adj";
            } else if (vbTagClass.contains(basicCategory)) {
                basicCategory = "vb";
            }
            return this.annotationMark + basicCategory;
        }

        public String toString() {
            return "AddEquivalencedConjNode[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AddEquivalencedNodeFunction.class */
    public static class AddEquivalencedNodeFunction implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1;

        public AddEquivalencedNodeFunction(String str, String str2) {
            this.annotationMark = str;
            this.key = str2;
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            String value = tregexMatcher.getNode(this.key).label().value();
            return value.startsWith("S") ? this.annotationMark + 'S' : value.startsWith("V") ? this.annotationMark + 'V' : "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunction[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AddEquivalencedNodeFunctionVar.class */
    public static class AddEquivalencedNodeFunctionVar implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1;

        public AddEquivalencedNodeFunctionVar(String str, String str2) {
            this.annotationMark = str;
            this.key = str2;
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            String value = tregexMatcher.getNode(this.key).label().value();
            return (value.startsWith("S") || value.startsWith("V") || value.startsWith(ISODateInstance.OPEN_RANGE_AFTER)) ? this.annotationMark + "VSA" : "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunctionVar[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AddRelativeNodeFunction.class */
    public static class AddRelativeNodeFunction implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private String key2;
        private boolean doBasicCat;
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1;

        public AddRelativeNodeFunction(String str, String str2, boolean z) {
            this.doBasicCat = false;
            this.annotationMark = str;
            this.key = str2;
            this.key2 = null;
            this.doBasicCat = z;
        }

        public AddRelativeNodeFunction(String str, String str2, String str3, boolean z) {
            this(str, str2, z);
            this.key2 = str3;
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            if (this.key2 == null) {
                return this.annotationMark + (this.doBasicCat ? tlp.basicCategory(tregexMatcher.getNode(this.key).label().value()) : tregexMatcher.getNode(this.key).label().value());
            }
            return this.annotationMark + (this.doBasicCat ? tlp.basicCategory(tregexMatcher.getNode(this.key).label().value()) : tregexMatcher.getNode(this.key).label().value()) + this.annotationMark + (this.doBasicCat ? tlp.basicCategory(tregexMatcher.getNode(this.key2).label().value()) : tregexMatcher.getNode(this.key2).label().value());
        }

        public String toString() {
            return this.key2 == null ? "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ']' : "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ',' + this.key2 + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AddRelativeNodeRegexFunction.class */
    public static class AddRelativeNodeRegexFunction implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private Pattern pattern;
        private String key2 = null;
        private Pattern pattern2;
        private static final long serialVersionUID = 1;

        public AddRelativeNodeRegexFunction(String str, String str2, String str3) {
            this.annotationMark = str;
            this.key = str2;
            try {
                this.pattern = Pattern.compile(str3);
            } catch (PatternSyntaxException e) {
                ArabicTreebankParserParams.log.info("Bad pattern: " + str3);
                this.pattern = null;
                throw new IllegalArgumentException(e);
            }
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            String value = tregexMatcher.getNode(this.key).label().value();
            if (this.pattern != null) {
                Matcher matcher = this.pattern.matcher(value);
                if (matcher.find()) {
                    value = matcher.group(1);
                }
            }
            if (this.key2 != null && this.pattern2 != null) {
                String value2 = tregexMatcher.getNode(this.key2).label().value();
                Matcher matcher2 = this.pattern2.matcher(value2);
                value = matcher2.find() ? value + this.annotationMark + matcher2.group(1) : value + this.annotationMark + value2;
            }
            return this.annotationMark + value;
        }

        public String toString() {
            return "AddRelativeNodeRegexFunction[" + this.annotationMark + ',' + this.key + ',' + this.pattern + ']';
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$AnnotatePunctuationFunction2.class */
    public static class AnnotatePunctuationFunction2 implements SerializableFunction<TregexMatcher, String> {
        static final String key = "term";
        private static final Pattern quote = Pattern.compile("^\"$");
        private static final long serialVersionUID = 1;

        private AnnotatePunctuationFunction2() {
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            String value = tregexMatcher.getNode(key).value();
            return value.equals(".") ? "-fs" : value.equals("?") ? "-quest" : value.equals(",") ? "-comma" : (value.equals(MorphoFeatures.KEY_VAL_DELIM) || value.equals(";")) ? "-colon" : (value.equals("(") || value.equals("-LRB-")) ? "-lrb" : (value.equals(")") || value.equals("-RRB-")) ? "-rrb" : value.equals("-PLUS-") ? "-plus" : value.equals("-") ? "-dash" : quote.matcher(value).matches() ? "-quote" : "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction2";
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$ArabicSubcategoryStripper.class */
    protected class ArabicSubcategoryStripper implements TreeTransformer {
        protected final TreeFactory tf = new LabeledScoredTreeFactory();

        protected ArabicSubcategoryStripper() {
        }

        @Override // edu.stanford.nlp.trees.TreeTransformer
        public Tree transformTree(Tree tree) {
            String basicCategory;
            Label label = tree.label();
            String value = label.value();
            if (tree.isLeaf()) {
                Tree newLeaf = this.tf.newLeaf(label);
                newLeaf.setScore(tree.score());
                return newLeaf;
            }
            if (tree.isPhrasal()) {
                basicCategory = (ArabicTreebankParserParams.this.retainNPTmp && value.startsWith("NP-TMP")) ? "NP-TMP" : (ArabicTreebankParserParams.this.retainNPSbj && value.startsWith("NP-SBJ")) ? "NP-SBJ" : (ArabicTreebankParserParams.this.retainPRD && value.matches("VB[^P].*PRD.*")) ? ArabicTreebankParserParams.this.tlp.basicCategory(value) + "-PRD" : ArabicTreebankParserParams.this.tlp.basicCategory(value);
            } else if (tree.isPreTerminal()) {
                basicCategory = ArabicTreebankParserParams.this.tlp.basicCategory(value);
            } else {
                System.err.printf("Encountered a non-leaf/phrasal/pre-terminal node %s\n", value);
                basicCategory = ArabicTreebankParserParams.this.tlp.basicCategory(value);
            }
            ArrayList arrayList = new ArrayList(tree.numChildren());
            Iterator<Tree> it = tree.getChildrenAsList().iterator();
            while (it.hasNext()) {
                arrayList.add(transformTree(it.next()));
            }
            Tree newTreeNode = this.tf.newTreeNode(label, arrayList);
            newTreeNode.setValue(basicCategory);
            newTreeNode.setScore(tree.score());
            if (newTreeNode.label() instanceof HasTag) {
                ((HasTag) newTreeNode.label()).setTag(basicCategory);
            }
            return newTreeNode;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ArabicTreebankParserParams$SimpleStringFunction.class */
    public static class SimpleStringFunction implements SerializableFunction<TregexMatcher, String> {
        private String result;
        private static final long serialVersionUID = 1;

        public SimpleStringFunction(String str) {
            this.result = str;
        }

        @Override // java.util.function.Function
        public String apply(TregexMatcher tregexMatcher) {
            return this.result;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.result + ']';
        }
    }

    public ArabicTreebankParserParams() {
        super(new ArabicTreebankLanguagePack());
        this.retainNPTmp = false;
        this.retainNPSbj = false;
        this.retainPRD = false;
        this.retainPPClr = false;
        this.changeNoLabels = false;
        this.collinizerRetainsPunctuation = false;
        this.discardX = false;
        this.morphoSpec = null;
        this.baselineFeatures = new ArrayList();
        this.baselineFeatures.add("-markNounNPargTakers");
        this.baselineFeatures.add("-genitiveMark");
        this.baselineFeatures.add("-splitPUNC");
        this.baselineFeatures.add("-markContainsVerb");
        this.baselineFeatures.add("-markStrictBaseNP");
        this.baselineFeatures.add("-markOneLevelIdafa");
        this.baselineFeatures.add("-splitIN");
        this.baselineFeatures.add("-markMasdarVP");
        this.baselineFeatures.add("-containsSVO");
        this.baselineFeatures.add("-splitCC");
        this.baselineFeatures.add("-markFem");
        this.baselineFeatures.add("-mwe");
        this.baselineFeatures.add("-mweContainsVerb");
        this.additionalFeatures = new ArrayList();
        this.optionsString = new StringBuilder();
        this.optionsString.append("ArabicTreebankParserParams\n");
        this.annotationPatterns = Generics.newHashMap();
        this.activeAnnotations = new ArrayList();
        this.headFinder = headFinder();
        initializeAnnotationPatterns();
    }

    @Override // edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory(this.retainNPTmp, this.retainPRD, this.changeNoLabels, this.discardX, this.retainNPSbj, false, this.retainPPClr);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(treeReaderFactory(), this.inputEncoding);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public DiskTreebank diskTreebank() {
        return new DiskTreebank(treeReaderFactory(), this.inputEncoding);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public HeadFinder headFinder() {
        if (this.headFinder == null) {
            this.headFinder = new ArabicHeadFinder(treebankLanguagePack());
        }
        return this.headFinder;
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public HeadFinder typedDependencyHeadFinder() {
        return headFinder();
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public Lexicon lex(Options options, Index<String> index, Index<String> index2) {
        if (options.lexOptions.uwModelTrainer == null) {
            options.lexOptions.uwModelTrainer = "edu.stanford.nlp.parser.lexparser.ArabicUnknownWordModelTrainer";
        }
        return this.morphoSpec != null ? new FactoredLexicon(options, this.morphoSpec, index, index2) : new BaseLexicon(options, index, index2);
    }

    @Override // edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public List<? extends HasWord> defaultTestSentence() {
        return SentenceUtils.toWordList("هو", "استنكر", "الحكومة", "يوم", "امس", ".");
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeTransformer subcategoryStripper() {
        return new ArabicSubcategoryStripper();
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeTransformer collinizer() {
        return new TreeCollinizer(this.tlp, !this.collinizerRetainsPunctuation, false);
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public TreeTransformer collinizerEvalb() {
        return collinizer();
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public String[] sisterSplitters() {
        return StringUtils.EMPTY_STRING_ARRAY;
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public Tree transformTree(Tree tree, Tree tree2) {
        String value = tree.value();
        StringBuilder sb = new StringBuilder();
        for (Pair<TregexPattern, Function<TregexMatcher, String>> pair : this.activeAnnotations) {
            TregexMatcher matcher = pair.first().matcher(tree2);
            if (matcher.matchesAt(tree)) {
                sb.append(pair.second().apply(matcher));
            }
        }
        if (tree.isPreTerminal() && tagSpec != null) {
            if (!(tree.firstChild().label() instanceof CoreLabel) || ((CoreLabel) tree.firstChild().label()).originalText() == null) {
                throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s", getClass().getName(), tree.toString()));
            }
            value = tagSpec.strToFeatures(((CoreLabel) tree.firstChild().label()).originalText()).getTag(value);
        }
        String str = value + ((Object) sb);
        tree.setValue(str);
        if (tree.isPreTerminal() && (tree.label() instanceof HasTag)) {
            ((HasTag) tree.label()).setTag(str);
        }
        return tree;
    }

    private void initializeAnnotationPatterns() {
        TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(headFinder());
        try {
            this.annotationPatterns.put("-genitiveMark", new Pair<>(TregexPattern.compile("@NP > @NP $- /^N/"), new SimpleStringFunction("-genitive")));
            this.annotationPatterns.put("-markStrictBaseNP", new Pair<>(tregexPatternCompiler.compile("@NP !< (__ < (__ < __))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markOneLevelIdafa", new Pair<>(tregexPatternCompiler.compile("@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-idafa1")));
            this.annotationPatterns.put("-markNounNPargTakers", new Pair<>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|DTNN|DTNNS|DTNNP|DTNNPS ># (@NP < @NP)"), new SimpleStringFunction("-NounNParg")));
            this.annotationPatterns.put("-markContainsVerb", new Pair<>(tregexPatternCompiler.compile("__ << (/^[CIP]?V/ < (__ !< __))"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-splitIN", new Pair<>(tregexPatternCompiler.compile("@IN < __=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-splitPUNC", new Pair<>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction2()));
            this.annotationPatterns.put("-markMasdarVP", new Pair<>(tregexPatternCompiler.compile("@VP|MWVP < /VBG|VN/"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-containsSVO", new Pair<>(tregexPatternCompiler.compile("__ << (@S < (@NP . @VP|MWVP))"), new SimpleStringFunction("-hasSVO")));
            this.annotationPatterns.put("-splitCC", new Pair<>(tregexPatternCompiler.compile("@CC|CONJ . __=term , __"), new AddEquivalencedConjNode("-", "term")));
            this.annotationPatterns.put("-markFem", new Pair<>(tregexPatternCompiler.compile("__ < /ة$/"), new SimpleStringFunction("-fem")));
            this.annotationPatterns.put("-mwe", new Pair<>(tregexPatternCompiler.compile("__ > /MW/=tag"), new AddRelativeNodeFunction("-", "tag", true)));
            this.annotationPatterns.put("-mweContainsVerb", new Pair<>(tregexPatternCompiler.compile("__ << @MWVP"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-splitPUNC2", new Pair<>(tregexPatternCompiler.compile("@PUNC < __=punc"), new AbstractTreebankParserParams.AnnotatePunctuationFunction("-", "punc")));
            this.annotationPatterns.put("-tagPAar", new Pair<>(tregexPatternCompiler.compile("!@PUNC < (__ !< __) > __=parent"), new AddRelativeNodeFunction("-", "parent", true)));
            this.annotationPatterns.put("-splitCC1", new Pair<>(tregexPatternCompiler.compile("@CC|CONJ < __=term"), new AddRelativeNodeRegexFunction("-", "term", "-*([^-].*)")));
            this.annotationPatterns.put("-splitCC2", new Pair<>(tregexPatternCompiler.compile("@CC . __=term , __"), new AddRelativeNodeFunction("-", "term", true)));
            this.annotationPatterns.put("-idafaJJ1", new Pair<>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-idafaJJ2", new Pair<>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP !<< @SBAR"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-properBaseNP", new Pair<>(tregexPatternCompiler.compile("@NP !<< @NP < /NNP/ !< @PUNC|CD"), new SimpleStringFunction("-prop")));
            this.annotationPatterns.put("-interrog", new Pair<>(tregexPatternCompiler.compile("__ << هل|ماذا|لماذا|اين|متى"), new SimpleStringFunction("-inter")));
            this.annotationPatterns.put("-splitPseudo", new Pair<>(tregexPatternCompiler.compile("@NN < مع|بعد|بين"), new SimpleStringFunction("-pseudo")));
            this.annotationPatterns.put("-nPseudo", new Pair<>(tregexPatternCompiler.compile("@NP < (@NN < مع|بعد|بين)"), new SimpleStringFunction("-npseudo")));
            this.annotationPatterns.put("-pseudoArg", new Pair<>(tregexPatternCompiler.compile("@NP < @NP $, (@NN < مع|بعد|بين)"), new SimpleStringFunction("-pseudoArg")));
            this.annotationPatterns.put("-eqL1", new Pair<>(tregexPatternCompiler.compile("__ < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-eqL1L2", new Pair<>(tregexPatternCompiler.compile("__ < (__ < (@S !< @VP|S)) | < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-fullQuote", new Pair<>(tregexPatternCompiler.compile("__ < ((@PUNC < \") $ (@PUNC < \"))"), new SimpleStringFunction("-fq")));
            this.annotationPatterns.put("-brokeQuote", new Pair<>(tregexPatternCompiler.compile("__ < ((@PUNC < \") !$ (@PUNC < \"))"), new SimpleStringFunction("-bq")));
            this.annotationPatterns.put("-splitVP", new Pair<>(tregexPatternCompiler.compile("@VP <# __=term1"), new AddRelativeNodeFunction("-", "term1", true)));
            this.annotationPatterns.put("-markFemP", new Pair<>(tregexPatternCompiler.compile("@NP|ADJP < (__ < /ة$/)"), new SimpleStringFunction("-femP")));
            this.annotationPatterns.put("-embedSBAR", new Pair<>(tregexPatternCompiler.compile("@NP|PP <+(@NP|PP) @SBAR"), new SimpleStringFunction("-embedSBAR")));
            this.annotationPatterns.put("-complexVP", new Pair<>(tregexPatternCompiler.compile("__ << (@VP < (@NP $ @NP)) > __"), new SimpleStringFunction("-complexVP")));
            this.annotationPatterns.put("-containsJJ", new Pair<>(tregexPatternCompiler.compile("@NP <+(@NP) /JJ/"), new SimpleStringFunction("-hasJJ")));
            this.annotationPatterns.put("-markMasdarVP2", new Pair<>(tregexPatternCompiler.compile("__ << @VN|VBG"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-coordNP", new Pair<>(tregexPatternCompiler.compile("@NP|ADJP <+(@NP|ADJP) (@CC|PUNC $- __ $+ __)"), new SimpleStringFunction("-coordNP")));
            this.annotationPatterns.put("-coordWa", new Pair<>(tregexPatternCompiler.compile("__ << (@CC , __ < و-)"), new SimpleStringFunction("-coordWA")));
            this.annotationPatterns.put("-NPhasADJP", new Pair<>(tregexPatternCompiler.compile("@NP <+(@NP) @ADJP"), new SimpleStringFunction("-NPhasADJP")));
            this.annotationPatterns.put("-NPADJP", new Pair<>(tregexPatternCompiler.compile("@NP < @ADJP"), new SimpleStringFunction("-npadj")));
            this.annotationPatterns.put("-NPJJ", new Pair<>(tregexPatternCompiler.compile("@NP < /JJ/"), new SimpleStringFunction("-npjj")));
            this.annotationPatterns.put("-NPCC", new Pair<>(tregexPatternCompiler.compile("@NP <+(@NP) @CC"), new SimpleStringFunction("-npcc")));
            this.annotationPatterns.put("-NPCD", new Pair<>(tregexPatternCompiler.compile("@NP < @CD"), new SimpleStringFunction("-npcd")));
            this.annotationPatterns.put("-NPNNP", new Pair<>(tregexPatternCompiler.compile("@NP < /NNP/"), new SimpleStringFunction("-npnnp")));
            this.annotationPatterns.put("-SVO", new Pair<>(tregexPatternCompiler.compile("@S < (@NP . @VP)"), new SimpleStringFunction("-svo")));
            this.annotationPatterns.put("-containsSBAR", new Pair<>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-hasSBAR")));
            this.annotationPatterns.put("-markGappedVP", new Pair<>(TregexPattern.compile("@VP > @VP $- __ $ /^(?:CC|CONJ)/ !< /^V/"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGappedVPConjoiners", new Pair<>(TregexPattern.compile("/^(?:CC|CONJ)/ $ (@VP > @VP $- __ !< /^V/)"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGenitiveParent", new Pair<>(TregexPattern.compile("@NP < (@NP > @NP $- /^N/)"), new SimpleStringFunction("-genitiveParent")));
            this.annotationPatterns.put("-maSdrMark", new Pair<>(tregexPatternCompiler.compile("/^N/ <<# (/^[t\\u062a].+[y\\u064a].$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark2", new Pair<>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a].+[y\\u064a].|<.{3,}|A.{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark3", new Pair<>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark4", new Pair<>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark5", new Pair<>(tregexPatternCompiler.compile("/^N/ <<# (__ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-mjjMark", new Pair<>(tregexPatternCompiler.compile("@JJ|DTJJ < /^m/ $+ @PP ># @ADJP "), new SimpleStringFunction("-mjj")));
            this.annotationPatterns.put("-markNPwithSdescendant", new Pair<>(tregexPatternCompiler.compile("__ !< @S << @S [ >> @NP | == @NP ]"), new SimpleStringFunction("-inNPdominatesS")));
            this.annotationPatterns.put("-markRightRecursiveNP", new Pair<>(tregexPatternCompiler.compile("__ <<- @NP [>>- @NP | == @NP]"), new SimpleStringFunction("-rrNP")));
            this.annotationPatterns.put("-markBaseNP", new Pair<>(tregexPatternCompiler.compile("@NP !< @NP !< @VP !< @SBAR !< @ADJP !< @ADVP !< @S !< @QP !< @UCP !< @PP"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markBaseNPplusIdafa", new Pair<>(tregexPatternCompiler.compile("@NP !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markTwoLevelIdafa", new Pair<>(tregexPatternCompiler.compile("@NP < (@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __))) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < (__ < __))))"), new SimpleStringFunction("-idafa2")));
            this.annotationPatterns.put("-markDefiniteIdafa", new Pair<>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) <+(/^NP/) (@NP < /^DT/)"), new SimpleStringFunction("-defIdafa")));
            this.annotationPatterns.put("-markDefiniteIdafa1", new Pair<>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) < (@NP < /^DT/) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-defIdafa1")));
            this.annotationPatterns.put("-markContainsSBAR", new Pair<>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-withSBAR")));
            this.annotationPatterns.put("-markPhrasalNodesDominatedBySBAR", new Pair<>(tregexPatternCompiler.compile("__ < (__ < __) >> @SBAR"), new SimpleStringFunction("-domBySBAR")));
            this.annotationPatterns.put("-markCoordinateNPs", new Pair<>(tregexPatternCompiler.compile("@NP < @CC|CONJ"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-markNounAdjVPheads", new Pair<>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|JJ|DTJJ|DTNN|DTNNS|DTNNP|DTNNPS ># @VP"), new SimpleStringFunction("-VHead")));
            this.annotationPatterns.put("-markPronominalNP", new Pair<>(tregexPatternCompiler.compile("@NP < @PRP"), new SimpleStringFunction("-PRP")));
            this.annotationPatterns.put("-markMultiCC", new Pair<>(tregexPatternCompiler.compile("__ < (@CC $.. @CC)"), new SimpleStringFunction("-multiCC")));
            this.annotationPatterns.put("-markHasCCdaughter", new Pair<>(tregexPatternCompiler.compile("__ < @CC"), new SimpleStringFunction("-CCdtr")));
            this.annotationPatterns.put("-markAcronymNP", new Pair<>(tregexPatternCompiler.compile("@NP !<  (__ < (__ < __)) < (/^NN/ < /^.$/ $ (/^NN/ < /^.$/)) !< (__ < /../)"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markAcronymNN", new Pair<>(tregexPatternCompiler.compile("/^NN/ < /^.$/ $ (/^NN/ < /^.$/) > (@NP !<  (__ < (__ < __)) !< (__ < /../))"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markPPwithPPdescendant", new Pair<>(tregexPatternCompiler.compile("__ !< @PP << @PP [ >> @PP | == @PP ]"), new SimpleStringFunction("-inPPdominatesPP")));
            this.annotationPatterns.put("-gpAnnotatePrepositions", new Pair<>(TregexPattern.compile("/^(?:IN|PREP)$/ > (__ > __=gp)"), new AddRelativeNodeFunction("^^", "gp", false)));
            this.annotationPatterns.put("-gpEquivalencePrepositions", new Pair<>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-gpEquivalencePrepositionsVar", new Pair<>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunctionVar("^^", "gp")));
            this.annotationPatterns.put("-markPPParent", new Pair<>(tregexPatternCompiler.compile("@PP=max !< @PP"), new AddRelativeNodeRegexFunction("^^", "max", "^(\\w)")));
            this.annotationPatterns.put("-whPP", new Pair<>(tregexPatternCompiler.compile("@PP <- (@SBAR <, /^WH/)"), new SimpleStringFunction("-whPP")));
            this.annotationPatterns.put("-deflateMin", new Pair<>(tregexPatternCompiler.compile("__ < (__ < من)"), new SimpleStringFunction("-min")));
            this.annotationPatterns.put("-v2MarkovIN", new Pair<>(tregexPatternCompiler.compile("@IN > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
            this.annotationPatterns.put("-pleonasticMin", new Pair<>(tregexPatternCompiler.compile("@PP <, (IN < من) > @S"), new SimpleStringFunction("-pleo")));
            this.annotationPatterns.put("-v2MarkovPP", new Pair<>(tregexPatternCompiler.compile("@PP > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
        } catch (TregexParseException e) {
            int size = this.annotationPatterns.size() + 1;
            log.info("Parse exception on " + (size == 1 ? "1st" : size == 2 ? "2nd" : size + "th") + " annotation pattern initialization:" + e);
            throw e;
        }
    }

    private void setHeadFinder(HeadFinder headFinder) {
        if (headFinder == null) {
            throw new IllegalArgumentException();
        }
        this.headFinder = headFinder;
        initializeAnnotationPatterns();
        this.activeAnnotations.clear();
        Iterator<String> it = this.baselineFeatures.iterator();
        while (it.hasNext()) {
            this.activeAnnotations.add(this.annotationPatterns.get(it.next()));
        }
        Iterator<String> it2 = this.additionalFeatures.iterator();
        while (it2.hasNext()) {
            this.activeAnnotations.add(this.annotationPatterns.get(it2.next()));
        }
    }

    private String setupMorphoFeatures(String str) {
        String[] split = str.split(",");
        this.morphoSpec = this.tlp.morphFeatureSpec();
        for (String str2 : split) {
            this.morphoSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.valueOf(str2.trim()));
        }
        return this.morphoSpec.toString();
    }

    private void removeBaselineFeature(String str) {
        if (this.baselineFeatures.contains(str)) {
            this.baselineFeatures.remove(str);
            this.activeAnnotations.remove(this.annotationPatterns.get(str));
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public void display() {
        log.info(this.optionsString.toString());
    }

    @Override // edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams, edu.stanford.nlp.parser.lexparser.TreebankLangParserParams
    public int setOptionFlag(String[] strArr, int i) {
        boolean z = false;
        if (this.annotationPatterns.keySet().contains(strArr[i])) {
            if (!this.baselineFeatures.contains(strArr[i])) {
                this.additionalFeatures.add(strArr[i]);
            }
            Pair<TregexPattern, Function<TregexMatcher, String>> pair = this.annotationPatterns.get(strArr[i]);
            this.activeAnnotations.add(pair);
            this.optionsString.append("Option " + strArr[i] + " added annotation pattern " + pair.first() + " with annotation " + pair.second() + '\n');
            z = true;
        } else if (strArr[i].equals("-retainNPTmp")) {
            this.optionsString.append("Retaining NP-TMP marking.\n");
            this.retainNPTmp = true;
            z = true;
        } else if (strArr[i].equals("-retainNPSbj")) {
            this.optionsString.append("Retaining NP-SBJ dash tag.\n");
            this.retainNPSbj = true;
            z = true;
        } else if (strArr[i].equals("-retainPPClr")) {
            this.optionsString.append("Retaining PP-CLR dash tag.\n");
            this.retainPPClr = true;
            z = true;
        } else if (strArr[i].equals("-discardX")) {
            this.optionsString.append("Discarding X trees.\n");
            this.discardX = true;
            z = true;
        } else if (strArr[i].equals("-changeNoLabels")) {
            this.optionsString.append("Change no labels.\n");
            this.changeNoLabels = true;
            z = true;
        } else if (strArr[i].equals("-markPRDverbs")) {
            this.optionsString.append("Mark PRD.\n");
            this.retainPRD = true;
            z = true;
        } else if (strArr[i].equals("-collinizerRetainsPunctuation")) {
            this.optionsString.append("Collinizer retains punctuation.\n");
            this.collinizerRetainsPunctuation = true;
            z = true;
        } else if (strArr[i].equals("-arabicFactored")) {
            Iterator<String> it = this.baselineFeatures.iterator();
            while (it.hasNext()) {
                setOptionFlag(new String[]{it.next()}, 0);
            }
            z = true;
        } else if (strArr[i].equalsIgnoreCase("-headFinder") && i + 1 < strArr.length) {
            try {
                setHeadFinder((HeadFinder) Class.forName(strArr[i + 1]).getDeclaredConstructor(new Class[0]).newInstance(new Object[0]));
                this.optionsString.append("HeadFinder: " + strArr[i + 1] + "\n");
            } catch (Exception e) {
                log.info(e);
                log.info(getClass().getName() + ": Could not load head finder " + strArr[i + 1]);
            }
            i++;
            z = true;
        } else if (strArr[i].equals("-factlex") && i + 1 < strArr.length) {
            i++;
            this.optionsString.append("Factored Lexicon: active features: ").append(setupMorphoFeatures(strArr[i]));
            z = true;
        } else if (strArr[i].equals("-noFeatures")) {
            this.activeAnnotations.clear();
            this.optionsString.append("Removed all manual features.\n");
            z = true;
        }
        if (z) {
            i++;
        }
        return i;
    }

    public static void main(String[] strArr) {
        if (strArr.length != 1) {
            System.exit(-1);
        }
        ArabicTreebankParserParams arabicTreebankParserParams = new ArabicTreebankParserParams();
        arabicTreebankParserParams.setOptionFlag(new String[]{"-arabicFactored"}, 0);
        DiskTreebank diskTreebank = arabicTreebankParserParams.diskTreebank();
        diskTreebank.loadPath(strArr[0], "txt", false);
        Iterator<Tree> it = diskTreebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            Iterator<Tree> it2 = next.iterator();
            while (it2.hasNext()) {
                arabicTreebankParserParams.transformTree(it2.next(), next);
            }
            System.out.println(next);
        }
    }

    static {
        tagSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NGEN);
    }
}
