package ru.ifmo.genetics.transcriptome;

import it.unimi.dsi.fastutil.longs.Long2IntMap;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import ru.ifmo.genetics.dna.kmers.KmerIteratorFactory;
import ru.ifmo.genetics.dna.kmers.ShortKmerIteratorFactory;
import ru.ifmo.genetics.structures.map.ArrayLong2IntHashMap;
import ru.ifmo.genetics.tools.ec.DnaQReadDispatcher;
import ru.ifmo.genetics.tools.ec.KmerLoadWorker;
import ru.ifmo.genetics.tools.io.LazyBinqReader;
import ru.ifmo.genetics.utils.Misc;
import ru.ifmo.genetics.utils.NumUtils;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileMVParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.IntParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.KmerIteratorFactoryParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.LongParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.ParameterBuilder;
import ru.ifmo.genetics.utils.tool.values.InMemoryValue;
import ru.ifmo.genetics.utils.tool.values.InValue;

/* loaded from: input_file:ru/ifmo/genetics/transcriptome/TranscriptomeKmerStatisticsGatherer.class */
public class TranscriptomeKmerStatisticsGatherer extends Tool {
    public static final String NAME = "kmer-statistics-gatherer";
    public static final String DESCRIPTION = "differentiates good kmers from bad ones";
    static final int LOAD_TASK_SIZE = 32768;
    public final Parameter<Integer> maximalBadFrequence;
    public final Parameter<Integer> k;
    public final Parameter<Long> maxSize;
    public final Parameter<File[]> inputFiles;
    public final Parameter<File> outputDir;
    public final Parameter<File> prefixesFile;
    public final Parameter<KmerIteratorFactory> kmerIteratorFactory;
    private final InMemoryValue<Long> badKmersNumberOutValue;
    public final InValue<Long> badKmersNumberOut;
    private final InMemoryValue<Long> readsNumberOutValue;
    public final InValue<Long> readsNumberOut;
    private int prefixLength;
    private long prefixMask;
    private int LEN;
    private long MASK;
    private long MAX_SIZE;
    private long totalGood;
    private long totalBad;
    private static final Random random = new Random(1989);

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void runImpl() throws ExecutionFailedException {
        this.LEN = this.k.get().intValue();
        this.MASK = (1 << (2 * this.LEN)) - 1;
        this.MAX_SIZE = this.maxSize.get().longValue();
        this.outputDir.get().mkdir();
        debug("MAXIMAL_SIZE = " + this.MAX_SIZE);
        try {
            ArrayLong2IntHashMap load = load(this.inputFiles.get(), this.MAX_SIZE);
            info("threshold = 1");
            if (this.prefixLength != 0) {
                load = null;
            }
            long j = 1 << (2 * this.prefixLength);
            String absolutePath = this.outputDir.get().getAbsolutePath();
            try {
                PrintWriter printWriter = new PrintWriter(this.prefixesFile.get());
                for (long j2 = 0; j2 < j; j2++) {
                    long j3 = j2 << (2 * (this.LEN - this.prefixLength));
                    printWriter.println(Misc.getString(j2, this.prefixLength));
                    info("processing prefix: \"" + Misc.getString(j2, this.prefixLength) + "\"");
                    ArrayLong2IntHashMap load2 = this.prefixLength == 0 ? load : load(this.inputFiles.get(), Long.MAX_VALUE, j3, this.prefixMask, this.prefixLength);
                    debug("loaded " + load2.size() + " kmers");
                    dumpKmers(load2.hm, 1, absolutePath + File.separator + "kmers" + Misc.getString(j2, this.prefixLength) + ".good", absolutePath + File.separator + "kmers" + Misc.getString(j2, this.prefixLength) + ".bad");
                    debug(Misc.getString(j2, this.prefixLength) + " done");
                }
                printWriter.close();
                info("total good kmers: " + this.totalGood);
                info("total bad kmers:  " + this.totalBad);
                this.badKmersNumberOutValue.set(Long.valueOf(this.totalBad));
            } catch (IOException e) {
                throw new ExecutionFailedException(e);
            }
        } catch (IOException e2) {
            throw new ExecutionFailedException("Couldn't load kmers", e2);
        }
    }

    @Override // ru.ifmo.genetics.utils.tool.Tool
    protected void clean() {
    }

    public static void main(String[] strArr) {
        new TranscriptomeKmerStatisticsGatherer().mainImpl(strArr);
    }

    public TranscriptomeKmerStatisticsGatherer() {
        super("kmer-statistics-gatherer", "differentiates good kmers from bad ones");
        this.maximalBadFrequence = addParameter(new IntParameterBuilder("maximal-bad-frequence").optional().withShortOpt("b").withDescription("maximal frequency for a kmer to be assumed erroneous").create());
        this.k = addParameter(new IntParameterBuilder("k").mandatory().withShortOpt("k").withDescription("k-mer size").create());
        this.maxSize = addParameter(new LongParameterBuilder("max-size").optional().withDescription("maximal hashset size").withDefaultValue((ParameterBuilder<Long>) Long.valueOf(NumUtils.highestBits(Misc.availableMemory() / 42, 3))).create());
        this.inputFiles = addParameter(new FileMVParameterBuilder("reads").mandatory().withDescription("list of input files").create());
        this.outputDir = addParameter(new FileParameterBuilder("output-dir").withShortOpt("o").withDescription("directory to place output files").withDefaultValue(this.workDir.append("kmers")).create());
        this.prefixesFile = addParameter(new FileParameterBuilder("prefixes-file").withDescription("file with prefixes").withDefaultValue(this.workDir.append("prefixes")).create());
        this.kmerIteratorFactory = Parameter.createParameter(new KmerIteratorFactoryParameterBuilder("kmer-iterator-factory").optional().withDescription("factory used for iterating through kmers").withDefaultValue((ParameterBuilder<KmerIteratorFactory>) new ShortKmerIteratorFactory()).create());
        this.badKmersNumberOutValue = new InMemoryValue<>();
        this.badKmersNumberOut = addOutput("bad-kmers-number", this.badKmersNumberOutValue, Long.class);
        this.readsNumberOutValue = new InMemoryValue<>();
        this.readsNumberOut = addOutput("reads-number", this.readsNumberOutValue, Long.class);
        this.totalGood = 0L;
        this.totalBad = 0L;
    }

    ArrayLong2IntHashMap load(File[] fileArr, long j) throws IOException {
        return load(fileArr, j, 0L, 0L, 0);
    }

    ArrayLong2IntHashMap load(File[] fileArr, long j, long j2, long j3, int i) throws IOException {
        ArrayLong2IntHashMap arrayLong2IntHashMap = new ArrayLong2IntHashMap(((int) (Math.log(this.availableProcessors.get().intValue()) / Math.log(2.0d))) + 4);
        DnaQReadDispatcher dnaQReadDispatcher = new DnaQReadDispatcher(new LazyBinqReader(fileArr), 32768, this.progress);
        KmerLoadWorker[] kmerLoadWorkerArr = new KmerLoadWorker[this.availableProcessors.get().intValue()];
        CountDownLatch countDownLatch = new CountDownLatch(kmerLoadWorkerArr.length);
        for (int i2 = 0; i2 < kmerLoadWorkerArr.length; i2++) {
            kmerLoadWorkerArr[i2] = new KmerLoadWorker(dnaQReadDispatcher, countDownLatch, new Random(42L), this.LEN, j, arrayLong2IntHashMap, j2, j3, i, this.kmerIteratorFactory.get());
            new Thread(kmerLoadWorkerArr[i2]).start();
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            warn("Main thread interrupted");
            for (KmerLoadWorker kmerLoadWorker : kmerLoadWorkerArr) {
                kmerLoadWorker.interrupt();
            }
        }
        info("loaded");
        this.prefixMask = kmerLoadWorkerArr[0].getPrefixMask();
        this.prefixLength = kmerLoadWorkerArr[0].getPrefixLength();
        this.readsNumberOutValue.set(Long.valueOf(dnaQReadDispatcher.getReads()));
        return arrayLong2IntHashMap;
    }

    /* JADX WARN: Type inference failed for: r0v10, types: [it.unimi.dsi.fastutil.longs.LongSet] */
    void dumpKmers(Long2IntMap[] long2IntMapArr, int i, String str, String str2) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(str)));
        for (Long2IntMap long2IntMap : long2IntMapArr) {
            Iterator it2 = long2IntMap.keySet2().iterator();
            while (it2.hasNext()) {
                long longValue = ((Long) it2.next()).longValue();
                if (long2IntMap.get(longValue) <= i) {
                    this.totalBad++;
                } else {
                    dataOutputStream.writeLong(longValue);
                    dataOutputStream.writeInt(long2IntMap.get(longValue));
                    this.totalGood++;
                }
            }
        }
        dataOutputStream.close();
    }
}
