package org.carrot2.source.pubmed;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.SAXParserFactory;
import org.carrot2.core.Document;
import org.carrot2.core.LanguageCode;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.source.SimpleSearchEngine;
import org.carrot2.util.StringUtils;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.httpclient.HttpUtils;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

@Bindable(prefix = "PubMedDocumentSource")
/* loaded from: input_file:org/carrot2/source/pubmed/PubMedDocumentSource.class */
public class PubMedDocumentSource extends SimpleSearchEngine {
    public static final String E_SEARCH_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi";
    public static final String E_FETCH_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";

    @Override // org.carrot2.source.SimpleSearchEngine
    protected SearchEngineResponse fetchSearchResponse() throws Exception {
        return getPubMedAbstracts(getPubMedIds(this.query, this.results));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.carrot2.source.SearchEngineBase
    public void afterFetch(SearchEngineResponse searchEngineResponse) {
        Iterator<Document> it = searchEngineResponse.results.iterator();
        while (it.hasNext()) {
            it.next().setLanguage(LanguageCode.ENGLISH);
        }
    }

    private List<String> getPubMedIds(String str, int i) throws Exception {
        XMLReader xMLReader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
        xMLReader.setFeature("http://xml.org/sax/features/validation", false);
        xMLReader.setFeature("http://xml.org/sax/features/namespaces", true);
        PubMedSearchHandler pubMedSearchHandler = new PubMedSearchHandler();
        xMLReader.setContentHandler(pubMedSearchHandler);
        HttpUtils.Response doGET = HttpUtils.doGET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&usehistory=n&term=" + StringUtils.urlEncodeWrapException(str, "UTF-8") + "&retmax=" + Integer.toString(i), null, null);
        if (doGET.status != 200) {
            throw new IOException("PubMed returned HTTP Error: " + doGET.status + ", HTTP payload: " + new String(doGET.payload, "iso8859-1"));
        }
        xMLReader.parse(new InputSource(doGET.getPayloadAsStream()));
        return pubMedSearchHandler.getPubMedPrimaryIds();
    }

    private SearchEngineResponse getPubMedAbstracts(List<String> list) throws Exception {
        if (list.isEmpty()) {
            return new SearchEngineResponse();
        }
        XMLReader xMLReader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
        xMLReader.setFeature("http://xml.org/sax/features/validation", false);
        xMLReader.setFeature("http://xml.org/sax/features/namespaces", true);
        PubMedFetchHandler pubMedFetchHandler = new PubMedFetchHandler();
        xMLReader.setContentHandler(pubMedFetchHandler);
        HttpUtils.Response doGET = HttpUtils.doGET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&rettype=abstract&id=" + getIdsString(list), null, null);
        if (doGET.status != 200) {
            throw new IOException("PubMed returned HTTP Error: " + doGET.status + ", HTTP payload: " + new String(doGET.payload, "iso8859-1"));
        }
        xMLReader.parse(new InputSource(doGET.getPayloadAsStream()));
        return pubMedFetchHandler.getResponse();
    }

    private String getIdsString(List<String> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next());
            sb.append(",");
        }
        return sb.length() > 0 ? sb.substring(0, sb.length() - 1) : "";
    }
}
