package ru.ifmo.genetics.tools.ec;

import it.unimi.dsi.fastutil.longs.LongCollection;
import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import ru.ifmo.genetics.structures.map.ArrayLong2LongHashMap;
import ru.ifmo.genetics.tools.io.LazyLongReader;
import ru.ifmo.genetics.utils.FileUtils;
import ru.ifmo.genetics.utils.KmerUtils;
import ru.ifmo.genetics.utils.Misc;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.IntParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.LongParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.StringParameterBuilder;
import ru.ifmo.genetics.utils.tool.values.InMemoryValue;
import ru.ifmo.genetics.utils.tool.values.InValue;

/* loaded from: input_file:ru/ifmo/genetics/tools/ec/Cleaner.class */
public class Cleaner extends Tool {
    public static final String NAME = "cleaner";
    public static final String DESCRIPTION = "performs error correction";
    public final Parameter<Integer> k;
    public final Parameter<Integer> maximalIndelsNumber;
    public final Parameter<Integer> maximalSubsNumber;
    public final Parameter<String> prefixParameter;
    public final Parameter<File> kmersDir;
    public final Parameter<File> outputDir;
    public final Parameter<Long> badKmersNumber;
    private InMemoryValue<File[]> fixesFilesOutValue;
    public InValue<File[]> fixesFilesOut;
    int LEN;
    long MASK;
    int DISPATCH_WORK_RANGE_SIZE;
    int CLEAN_WORK_THREADS_NUMBER;
    int MAXIMAL_SUBS_NUMBER;
    int MAXIMAL_INDELS_NUMBER;
    String prefix;
    File DIR;
    long bad;

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void runImpl() throws ExecutionFailedException {
        this.LEN = this.k.get().intValue();
        this.MASK = (1 << (2 * this.LEN)) - 1;
        this.CLEAN_WORK_THREADS_NUMBER = this.availableProcessors.get().intValue();
        this.MAXIMAL_INDELS_NUMBER = this.maximalIndelsNumber.get().intValue();
        this.MAXIMAL_SUBS_NUMBER = this.maximalSubsNumber.get().intValue();
        this.prefix = this.prefixParameter.get();
        this.DIR = this.kmersDir.get();
        this.outputDir.get().mkdir();
        this.bad = this.badKmersNumber.get().longValue();
        try {
            runAndGetResults(this.bad);
        } catch (IOException e) {
            throw new ExecutionFailedException(e);
        } catch (InterruptedException e2) {
            throw new ExecutionFailedException(e2);
        }
    }

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void clean() {
    }

    public Cleaner() {
        super(NAME, DESCRIPTION);
        this.k = addParameter(new IntParameterBuilder("k").mandatory().withShortOpt("k").withDescription("k").create());
        this.maximalIndelsNumber = addParameter(new IntParameterBuilder("maximal-indels-number").mandatory().withDescription("maximal indels number").create());
        this.maximalSubsNumber = addParameter(new IntParameterBuilder("maximal-subs-number").mandatory().withDescription("maximal substitutions number").create());
        this.prefixParameter = addParameter(new StringParameterBuilder("prefix").withDefaultValue((StringParameterBuilder) "").withDescription("prefix").create());
        this.kmersDir = addParameter(new FileParameterBuilder("kmers-dir").withDefaultValue((InValue) this.workDir.append("kmers")).withDescription("directory with good and bad kmers files").create());
        this.outputDir = addParameter(new FileParameterBuilder("output-dir").withDefaultValue((InValue) this.workDir.append("fixes")).withShortOpt("o").withDescription("directory for fixes").create());
        this.badKmersNumber = addParameter(new LongParameterBuilder("bad-kmers-number").mandatory().withDescription("the number of bad kmers").create());
        this.fixesFilesOutValue = new InMemoryValue<>();
        this.fixesFilesOut = addOutput("fixes-files", this.fixesFilesOutValue, File[].class);
        this.DISPATCH_WORK_RANGE_SIZE = 1024;
    }

    public Cleaner(String str) {
        super("cleaner_" + str, DESCRIPTION);
        this.k = addParameter(new IntParameterBuilder("k").mandatory().withShortOpt("k").withDescription("k").create());
        this.maximalIndelsNumber = addParameter(new IntParameterBuilder("maximal-indels-number").mandatory().withDescription("maximal indels number").create());
        this.maximalSubsNumber = addParameter(new IntParameterBuilder("maximal-subs-number").mandatory().withDescription("maximal substitutions number").create());
        this.prefixParameter = addParameter(new StringParameterBuilder("prefix").withDefaultValue((StringParameterBuilder) "").withDescription("prefix").create());
        this.kmersDir = addParameter(new FileParameterBuilder("kmers-dir").withDefaultValue((InValue) this.workDir.append("kmers")).withDescription("directory with good and bad kmers files").create());
        this.outputDir = addParameter(new FileParameterBuilder("output-dir").withDefaultValue((InValue) this.workDir.append("fixes")).withShortOpt("o").withDescription("directory for fixes").create());
        this.badKmersNumber = addParameter(new LongParameterBuilder("bad-kmers-number").mandatory().withDescription("the number of bad kmers").create());
        this.fixesFilesOutValue = new InMemoryValue<>();
        this.fixesFilesOut = addOutput("fixes-files", this.fixesFilesOutValue, File[].class);
        this.DISPATCH_WORK_RANGE_SIZE = 1024;
    }

    public static void main(String[] strArr) {
        new Cleaner().mainImpl(strArr);
    }

    private ArrayLong2LongHashMap[] clean(LongSet longSet, LazyLongReader lazyLongReader, long j) throws FileNotFoundException, EOFException {
        CleanDispatcher cleanDispatcher = new CleanDispatcher(lazyLongReader, this.DISPATCH_WORK_RANGE_SIZE, j, this.LEN, this.CLEAN_WORK_THREADS_NUMBER);
        CleanWorker[] cleanWorkerArr = new CleanWorker[this.CLEAN_WORK_THREADS_NUMBER];
        CountDownLatch countDownLatch = new CountDownLatch(cleanWorkerArr.length);
        for (int i = 0; i < this.CLEAN_WORK_THREADS_NUMBER; i++) {
            cleanWorkerArr[i] = new CleanWorker(cleanDispatcher, longSet, this.LEN, countDownLatch, this.MAXIMAL_SUBS_NUMBER, this.MAXIMAL_INDELS_NUMBER);
            new Thread(cleanWorkerArr[i]).start();
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            warn("Main thread interrupted");
            for (CleanWorker cleanWorker : cleanWorkerArr) {
                cleanWorker.interrupt();
            }
        }
        Tool.destroyProgressBar();
        ArrayLong2LongHashMap[] arrayLong2LongHashMapArr = new ArrayLong2LongHashMap[cleanWorkerArr.length];
        int i2 = 0;
        for (CleanWorker cleanWorker2 : cleanWorkerArr) {
            int i3 = i2;
            i2++;
            arrayLong2LongHashMapArr[i3] = cleanWorker2.getResults();
        }
        return arrayLong2LongHashMapArr;
    }

    public long kmersNumberInFiles(String str, List<String> list) throws IOException {
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(this.DIR.getAbsolutePath() + File.separator + "kmers" + it.next() + str);
        }
        return FileUtils.filesSizeByNames(arrayList) / 8;
    }

    public void loadKMers(String str, List<String> list, LongCollection longCollection) throws IOException {
        if (list == null) {
            list = new ArrayList();
            list.add("");
        }
        ArrayList arrayList = new ArrayList(list.size());
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(this.DIR + File.separator + "kmers" + it.next() + str);
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            String str2 = (String) it2.next();
            info("loading k-mers from " + str2);
            while (true) {
                try {
                    long readLong = new DataInputStream(new BufferedInputStream(new FileInputStream(str2))).readLong();
                    longCollection.add(readLong);
                    longCollection.add(KmerUtils.reverseComplement(readLong, this.LEN));
                } catch (EOFException e) {
                }
            }
        }
    }

    private ArrayLong2LongHashMap[] runAndGetResults(long j) throws IOException, InterruptedException {
        List<String> asList = Arrays.asList(this.prefix);
        info("prefixes: \"" + this.prefix + "\"");
        long kmersNumberInFiles = kmersNumberInFiles(".good", asList);
        debug("hash set size >= " + (((float) kmersNumberInFiles) / 0.75f));
        LongOpenHashBigSet longOpenHashBigSet = ((float) kmersNumberInFiles) / 0.75f >= 1.0737418E9f ? new LongOpenHashBigSet(kmersNumberInFiles) : new LongOpenHashSet((int) kmersNumberInFiles);
        loadKMers(".good", asList, longOpenHashBigSet);
        String[] strArr = new String[asList.size()];
        for (int i = 0; i < asList.size(); i++) {
            strArr[i] = this.DIR + File.separator + "kmers" + asList.get(i) + ".bad";
        }
        LazyLongReader lazyLongReader = new LazyLongReader(strArr);
        info("starting error correction...");
        ArrayLong2LongHashMap[] clean = clean(longOpenHashBigSet, lazyLongReader, j);
        info("dumping fixes");
        info("dumped " + dumpFixes(clean, this.outputDir.get().getAbsolutePath() + File.separator + Misc.join(asList, "_") + ".fixes") + " fixes");
        return clean;
    }

    private long dumpFixes(ArrayLong2LongHashMap arrayLong2LongHashMap, String str) throws IOException {
        return dumpFixes(arrayLong2LongHashMap, str, false);
    }

    private long dumpFixes(ArrayLong2LongHashMap arrayLong2LongHashMap, String str, boolean z) throws IOException {
        long j = 0;
        debug("dumping " + arrayLong2LongHashMap.size() + " fixes to " + str);
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str, z)));
        for (LongSet longSet : arrayLong2LongHashMap.keySets()) {
            Iterator it = longSet.iterator();
            while (it.hasNext()) {
                long longValue = ((Long) it.next()).longValue();
                dataOutputStream.writeLong(longValue);
                dataOutputStream.writeLong(arrayLong2LongHashMap.get(longValue));
                j++;
            }
        }
        dataOutputStream.close();
        this.fixesFilesOutValue.set(new File[]{new File(str)});
        return j;
    }

    private long dumpFixes(ArrayLong2LongHashMap[] arrayLong2LongHashMapArr, String str) throws IOException {
        long j = 0;
        int i = 0;
        for (ArrayLong2LongHashMap arrayLong2LongHashMap : arrayLong2LongHashMapArr) {
            j += dumpFixes(arrayLong2LongHashMap, str, i != 0);
            i++;
        }
        return j;
    }
}
