/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.ShortArrayList;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.Init;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.ExceptionUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;

@Bindable(prefix="Tokenizer")
public final class Tokenizer {
    @Init
    @Input
    @Attribute
    public Collection<String> documentFields = Arrays.asList("title", "snippet");
    private ArrayList<char[]> images;
    private ShortArrayList tokenTypes;
    private IntArrayList documentIndices;
    private ByteArrayList fieldIndices;

    public void tokenize(PreprocessingContext context) {
        List<Document> documents = context.documents;
        String[] fieldNames = this.documentFields.toArray(new String[this.documentFields.size()]);
        if (fieldNames.length > 8) {
            throw new ProcessingException("Maximum number of tokenized fields is 8.");
        }
        this.images = Lists.newArrayList();
        this.tokenTypes = new ShortArrayList();
        this.documentIndices = new IntArrayList();
        this.fieldIndices = new ByteArrayList();
        Iterator<Document> docIterator = documents.iterator();
        int documentIndex = 0;
        ITokenizer ts = context.language.getTokenizer();
        MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
        while (docIterator.hasNext()) {
            Document doc = docIterator.next();
            boolean hadTokens = false;
            for (int i = 0; i < fieldNames.length; ++i) {
                byte fieldIndex = (byte)i;
                String fieldName = fieldNames[i];
                String fieldValue = (String)doc.getField(fieldName);
                if (StringUtils.isEmpty((String)fieldValue)) continue;
                try {
                    ts.reset(new StringReader(fieldValue));
                    short tokenType = ts.nextToken();
                    if (tokenType == -1) continue;
                    if (hadTokens) {
                        this.addFieldSeparator(documentIndex);
                    }
                    do {
                        ts.setTermBuffer(wrapper);
                        this.add(documentIndex, fieldIndex, context.intern(wrapper), tokenType);
                    } while ((tokenType = ts.nextToken()) != -1);
                    hadTokens = true;
                    continue;
                }
                catch (IOException e) {
                    throw ExceptionUtils.wrapAsRuntimeException(e);
                }
            }
            if (docIterator.hasNext()) {
                this.addDocumentSeparator();
            }
            ++documentIndex;
        }
        this.addTerminator();
        context.allTokens.documentIndex = this.documentIndices.toArray();
        context.allTokens.fieldIndex = this.fieldIndices.toArray();
        context.allTokens.image = (char[][])this.images.toArray((T[])new char[this.images.size()][]);
        context.allTokens.type = this.tokenTypes.toArray();
        context.allFields.name = fieldNames;
        this.images = null;
        this.fieldIndices = null;
        this.tokenTypes = null;
        this.documentIndices = null;
    }

    void addTerminator() {
        this.add(-1, (byte)-1, null, (short)2048);
    }

    void addDocumentSeparator() {
        this.add(-1, (byte)-1, null, (short)512);
    }

    void addFieldSeparator(int documentIndex) {
        this.add(documentIndex, (byte)-1, null, (short)1024);
    }

    void addSentenceSeparator(int documentIndex, byte fieldIndex) {
        this.add(documentIndex, fieldIndex, null, (short)1024);
    }

    void add(int documentIndex, byte fieldIndex, char[] image, short tokenTypeCode) {
        this.documentIndices.add(documentIndex);
        this.fieldIndices.add(fieldIndex);
        this.images.add(image);
        this.tokenTypes.add(tokenTypeCode);
    }
}

