package org.carrot2.source.opensearch;

import com.google.common.collect.Maps;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.fetcher.FeedFetcher;
import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.MultipageSearchEngine;
import org.carrot2.source.MultipageSearchEngineMetadata;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.util.RollingWindowAverage;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.IntRange;
import org.carrot2.util.resource.URLResourceWithParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable(prefix = "OpenSearchDocumentSource")
/* loaded from: input_file:org/carrot2/source/opensearch/OpenSearchDocumentSource.class */
public class OpenSearchDocumentSource extends MultipageSearchEngine {
    static final Logger logger = LoggerFactory.getLogger(OpenSearchDocumentSource.class);
    private static final int MAX_CONCURRENT_THREADS = 10;

    @Init
    @Processing
    @Required
    @Input
    @Attribute
    public String feedUrlTemplate;

    @Init
    @Processing
    @Required
    @Input
    @Attribute
    @IntRange(min = 1)
    public int resultsPerPage = 50;

    @Init
    @Processing
    @Input
    @Attribute
    @IntRange(min = 1)
    public int maximumResults = RollingWindowAverage.SECOND;

    @Init
    @Processing
    @Input
    @Attribute
    public Map<String, String> feedUrlParams = null;

    @Init
    @Processing
    @Input
    @Attribute
    public String userAgent = null;
    private MultipageSearchEngineMetadata metadata;
    private FeedFetcher feedFetcher;
    private static final String SEARCH_TERMS_VARIABLE_NAME = "searchTerms";
    private static final String START_INDEX_VARIABLE_NAME = "startIndex";
    private static final String START_PAGE_VARIABLE_NAME = "startPage";
    private static final String COUNT_VARIABLE_NAME = "count";

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void beforeProcessing() {
        boolean containsAttributePlaceholder = URLResourceWithParams.containsAttributePlaceholder(this.feedUrlTemplate, START_PAGE_VARIABLE_NAME);
        if (!(containsAttributePlaceholder ^ URLResourceWithParams.containsAttributePlaceholder(this.feedUrlTemplate, START_INDEX_VARIABLE_NAME))) {
            throw new ProcessingException("The feedUrlTemplate must contain either " + URLResourceWithParams.formatAttributePlaceholder(START_INDEX_VARIABLE_NAME) + " or " + URLResourceWithParams.formatAttributePlaceholder(START_PAGE_VARIABLE_NAME) + " variable");
        }
        if (!URLResourceWithParams.containsAttributePlaceholder(this.feedUrlTemplate, SEARCH_TERMS_VARIABLE_NAME)) {
            throw new ProcessingException("The feedUrlTemplate must contain " + URLResourceWithParams.formatAttributePlaceholder(SEARCH_TERMS_VARIABLE_NAME) + " variable");
        }
        if (this.resultsPerPage == 0) {
            throw new ProcessingException("resultsPerPage must be set");
        }
        this.metadata = new MultipageSearchEngineMetadata(this.resultsPerPage, this.maximumResults, containsAttributePlaceholder);
        this.feedFetcher = new HttpURLFeedFetcher();
        if (StringUtils.isNotBlank(this.userAgent)) {
            this.feedFetcher.setUserAgent(this.userAgent);
        }
    }

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void process() throws ProcessingException {
        super.process(this.metadata, getSharedExecutor(MAX_CONCURRENT_THREADS, getClass()));
    }

    @Override // org.carrot2.source.MultipageSearchEngine
    protected Callable<SearchEngineResponse> createFetcher(final MultipageSearchEngine.SearchRange searchRange) {
        return new MultipageSearchEngine.SearchEngineResponseCallable() { // from class: org.carrot2.source.opensearch.OpenSearchDocumentSource.1
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super();
            }

            @Override // org.carrot2.source.MultipageSearchEngine.SearchEngineResponseCallable
            public SearchEngineResponse search() throws Exception {
                HashMap newHashMap = Maps.newHashMap();
                newHashMap.put(OpenSearchDocumentSource.SEARCH_TERMS_VARIABLE_NAME, OpenSearchDocumentSource.this.query);
                newHashMap.put(OpenSearchDocumentSource.START_INDEX_VARIABLE_NAME, Integer.valueOf(searchRange.start + 1));
                newHashMap.put(OpenSearchDocumentSource.START_PAGE_VARIABLE_NAME, Integer.valueOf(searchRange.start + 1));
                newHashMap.put(OpenSearchDocumentSource.COUNT_VARIABLE_NAME, Integer.valueOf(searchRange.results));
                StringBuilder sb = new StringBuilder(URLResourceWithParams.substituteAttributes(OpenSearchDocumentSource.this.feedUrlTemplate, newHashMap));
                if (OpenSearchDocumentSource.this.feedUrlParams != null) {
                    for (Map.Entry<String, String> entry : OpenSearchDocumentSource.this.feedUrlParams.entrySet()) {
                        sb.append('&');
                        sb.append(entry.getKey());
                        sb.append('=');
                        sb.append(org.carrot2.util.StringUtils.urlEncodeWrapException(entry.getValue(), "UTF-8"));
                    }
                }
                String sb2 = sb.toString();
                OpenSearchDocumentSource.logger.debug("Fetching URL: " + sb2);
                SyndFeed retrieveFeed = OpenSearchDocumentSource.this.feedFetcher.retrieveFeed(new URL(sb2));
                SearchEngineResponse searchEngineResponse = new SearchEngineResponse();
                if (retrieveFeed != null) {
                    for (SyndEntry syndEntry : retrieveFeed.getEntries()) {
                        Document document = new Document();
                        document.setField(Document.TITLE, OpenSearchDocumentSource.this.clean(syndEntry.getTitle()));
                        document.setField(Document.SUMMARY, OpenSearchDocumentSource.this.clean(syndEntry.getDescription().getValue()));
                        document.setField(Document.CONTENT_URL, syndEntry.getLink());
                        searchEngineResponse.results.add(document);
                    }
                }
                return searchEngineResponse;
            }
        };
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String clean(String str) {
        return org.carrot2.util.StringUtils.removeHtmlTags(StringEscapeUtils.unescapeHtml(str));
    }
}
