package ru.ifmo.genetics.distributed.clusterization.bipartite;

import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import ru.ifmo.genetics.distributed.clusterization.types.ComponentID;
import ru.ifmo.genetics.distributed.clusterization.types.Kmer;
import ru.ifmo.genetics.distributed.clusterization.types.Vertex;
import ru.ifmo.genetics.distributed.clusterization.types.VertexOrKmerOrComponentIDWritable;
import ru.ifmo.genetics.distributed.clusterization.types.VertexOrKmerWritableComparable;
import ru.ifmo.genetics.distributed.io.writable.Int128WritableComparable;

/* loaded from: input_file:ru/ifmo/genetics/distributed/clusterization/bipartite/BfsCleanerTask.class */
public class BfsCleanerTask {
    public static final String MAXIMUM_SIZE = "MAXIMUM_SIZE";
    public static final String DEFAULT_MAXIMUM_SIZE = "1000000";

    /* loaded from: input_file:ru/ifmo/genetics/distributed/clusterization/bipartite/BfsCleanerTask$CleanerMapper.class */
    private static class CleanerMapper extends MapReduceBase implements Mapper<VertexOrKmerWritableComparable, VertexOrKmerOrComponentIDWritable, ComponentID, VertexOrKmerWritableComparable> {
        ComponentID componentID = new ComponentID();

        private CleanerMapper() {
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(VertexOrKmerWritableComparable vertexOrKmerWritableComparable, VertexOrKmerOrComponentIDWritable vertexOrKmerOrComponentIDWritable, OutputCollector<ComponentID, VertexOrKmerWritableComparable> outputCollector, Reporter reporter) throws IOException {
            this.componentID.copyFieldsFrom((Int128WritableComparable) vertexOrKmerOrComponentIDWritable.getThird());
            outputCollector.collect(this.componentID, vertexOrKmerWritableComparable);
        }
    }

    /* loaded from: input_file:ru/ifmo/genetics/distributed/clusterization/bipartite/BfsCleanerTask$CleanerReducer.class */
    private static class CleanerReducer extends MapReduceBase implements Reducer<ComponentID, VertexOrKmerWritableComparable, VertexOrKmerWritableComparable, VertexOrKmerOrComponentIDWritable> {
        int maximumSize;
        Set<Vertex> vertexes = new HashSet();
        Set<Kmer> kmers = new HashSet();
        VertexOrKmerWritableComparable outKey = new VertexOrKmerWritableComparable();
        VertexOrKmerOrComponentIDWritable outValue = new VertexOrKmerOrComponentIDWritable();

        private CleanerReducer() {
        }

        @Override // org.apache.hadoop.mapred.MapReduceBase, org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.maximumSize = Integer.parseInt(jobConf.get(BfsCleanerTask.MAXIMUM_SIZE, BfsCleanerTask.DEFAULT_MAXIMUM_SIZE));
        }

        /* JADX WARN: Multi-variable type inference failed */
        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(ComponentID componentID, Iterator<VertexOrKmerWritableComparable> it2, OutputCollector<VertexOrKmerWritableComparable, VertexOrKmerOrComponentIDWritable> outputCollector, Reporter reporter) throws IOException {
            this.vertexes.clear();
            this.kmers.clear();
            while (it2.hasNext()) {
                VertexOrKmerWritableComparable next = it2.next();
                if (next.isFirst()) {
                    this.vertexes.add(new Vertex((Int128WritableComparable) next.getFirst()));
                    if (this.vertexes.size() > this.maximumSize) {
                        System.err.println("drop " + componentID + "kmers.size() = " + this.kmers.size());
                        return;
                    }
                } else {
                    this.kmers.add(new Kmer((Kmer) next.getSecond()));
                }
            }
            System.err.println("save vertexes.size() = " + this.vertexes.size() + ", kmers.size() = " + this.kmers.size());
            this.outValue.setThird(componentID);
            Iterator<Vertex> it3 = this.vertexes.iterator();
            while (it3.hasNext()) {
                this.outKey.setFirst(it3.next());
                outputCollector.collect(this.outKey, this.outValue);
            }
            Iterator<Kmer> it4 = this.kmers.iterator();
            while (it4.hasNext()) {
                this.outKey.setSecond(it4.next());
                outputCollector.collect(this.outKey, this.outValue);
            }
        }
    }

    public static void dropLargeComponents(Path path, Path path2, int i) throws IOException {
        JobConf jobConf = new JobConf(BfsCleanerTask.class);
        jobConf.setJobName("Clean BFS data");
        jobConf.set(MAXIMUM_SIZE, "" + i);
        FileInputFormat.setInputPaths(jobConf, path);
        FileOutputFormat.setOutputPath(jobConf, path2);
        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setMapOutputKeyClass(ComponentID.class);
        jobConf.setMapOutputValueClass(VertexOrKmerWritableComparable.class);
        jobConf.setOutputKeyClass(VertexOrKmerWritableComparable.class);
        jobConf.setOutputValueClass(VertexOrKmerOrComponentIDWritable.class);
        jobConf.setMapperClass(CleanerMapper.class);
        jobConf.setReducerClass(CleanerReducer.class);
        JobClient.runJob(jobConf);
    }
}
