Source Code of net.jcores.jre.cores.Core

/*
 * Core.java
 * 
 * Copyright (c) 2010, Ralf Biedert All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification, are
 * permitted provided that the following conditions are met:
 * 
 * Redistributions of source code must retain the above copyright notice, this list of
 * conditions and the following disclaimer. Redistributions in binary form must reproduce the
 * above copyright notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * Neither the name of the author nor the names of its contributors may be used to endorse or
 * promote products derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 */
package net.jcores.jre.cores;


import java.io.Serializable;
import java.util.ListIterator;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicInteger;


import net.jcores.jre.CommonCore;
import net.jcores.jre.options.Option;
import net.jcores.jre.utils.internal.processing.Folder;
import net.jcores.jre.utils.internal.processing.Mapper;
import net.jcores.jre.utils.internal.structures.ProfileInformation;


/**
 * The abstract base class of all cores. Contains commonly used methods and variables. In
 * general
 * you should not need to bother with this class, as in most cases you will extend
 * CoreObject for
 * your own cores, not this class.
 * 
 * @since 1.0
 * @author Ralf Biedert
 */
public abstract class Core implements Serializable {


    /** Used for serialization */
    private static final long serialVersionUID = 2195880634253143587L;


    /** Our 'parent' core. */
    protected transient CommonCore commonCore;


    /**
     * Creates the core object for the given collection.
     * 
     * @param core
     */
    protected Core(CommonCore core) {
        this.commonCore = core;
    }


    /**
     * Returns the size of enclosed elements, counting null elements. The size of the core 
     * is equivalent to the size of the contained array.<br/><br/>
     * 
     * Examples:
     * <ul>
     * <li><code>$("a", "b", "c").size()</code> - Returns 3.</li>
     * </ul>
     * 
     * @return The number of element slots this core encloses.
     */
    public abstract int size();


    /**
     * Starts a new parallel mapping process.
     * 
     * @param mapper The mapper to use.
     * @param options Relevant options: <code>OptionMapType</code>.
     */
    @SuppressWarnings("rawtypes")
    protected void map(final Mapper mapper, final Option... options) {
        final int size = mapper.core().size();
        final CommonCore cc = this.commonCore;


        // Quick pass for the probably most common events
        if (size <= 0) return;
        if (size == 1) {
            mapper.handle(0);
            return;
        }
        
        // Compute the later step size and the number of threads.
        final ProfileInformation profileInfo = cc.profileInformation();
        final int STEP_SIZE = Math.max(size() / 10, 1);
        final AtomicInteger index = new AtomicInteger();
        
        // Test-convert the first item and measure time. If time and size are above
        // a certain threshold, parallelize, otherwise map sequentially. However, in here we 
        // only test the first one
        long delta = 0;


        final ListIterator iterator = mapper.core().iterator();
        while(iterator.hasNext()) {
            final int i = iterator.nextIndex();
            final Object o = iterator.next();
            
            // Skipp all null elements
            if(o == null) continue;
            
            // Set the base count to the next position we should consider (in case we break the look)
            index.set(i + 1);


            // Now map the given value
            final long start = System.nanoTime();
            mapper.handle(i);
            delta = System.nanoTime() - start;
            
            break;
        }
        
        // Next, we check if have a speed gain when we move parallel. In general, we do not 
        // have a speed gain when the time it takes to spawn threads takes longer than it would 
        // take to finish the loop single-threaded
        final int toGo = size - index.get();
        final long estTime = delta * toGo;


        // Request a CPU for each element we have (in case we have many, we only receive maxCPU, in case we have 
        // very few, we don't block all CPUs.
        final int NUM_THREADS = Math.min(toGo, this.commonCore.profileInformation().numCPUs); // cc.requestCPUs(toGo);
        
        // We use a safetey factor of 2 for the fork time (FIXME: Should investigate what's the best factor),
        // also, we only spawn something if there is more than one element still to go.
        if((estTime < 2 * profileInfo.forkTime && toGo > 1) || NUM_THREADS < 2) {
            // Instantly release all CPUs when we go singlethreaded
            // this.commonCore.releaseCPUs(NUM_THREADS);
            
            // In this case, we go single threaded
            while(iterator.hasNext()) {
                final int i = iterator.nextIndex();
                iterator.next(); // We need to get the next() that the nextIndex increases. 
                mapper.handle(i);
            }
            
            return;
        }




        // TODO: Get proper value for step size (same problem, see below)
        // TODO: Check size, if small, don't do all this setup in here ...
        // NAH, even for two objects we can have a speed gain if the calls
        // are very slow ...


        
        // Okay, in this case the loop was broken and we decided to go parallel. In that case
        // setup the barrier and spawn threads for all our processors so that we process the array.
        final CyclicBarrier barrier = new CyclicBarrier(NUM_THREADS + 1);
        final AtomicInteger baseCount = new AtomicInteger();


        final Runnable runner = new Runnable() {
            public void run() {
                int bc = baseCount.getAndIncrement() * STEP_SIZE;
                int lower = Math.max(index.get(), bc);
                


                // Get new basecount for every pass ...
                while (lower < size) {
                    final int max = Math.min(Math.min(lower + STEP_SIZE, size), bc + STEP_SIZE);


                    // Pass over all elements
                    for (int i = lower; i < max; i++) {
                        mapper.handle(i);
                    }


                    bc = baseCount.getAndIncrement() * STEP_SIZE;
                    lower = bc;
                }


                // Signal finish
                try {
                    barrier.await();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (BrokenBarrierException e) {
                    e.printStackTrace();
                }
            }
        };


        // Execute all our runnables.
        for(int i=0; i<NUM_THREADS; i++) {
            cc.executor().getExecutor().execute(runner);
        }


        // Wait for all threads to finish ...
        try {
            barrier.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (BrokenBarrierException e) {
            e.printStackTrace();
        }
        
        // Release all CPUs we used
        // cc.releaseCPUs(NUM_THREADS);
    }


    /**
     * Starts a parallel folding process.
     * 
     * @param folder The folder to use.
     * @param options Relevant options: <code>OptionMapType</code>.
     */
    @SuppressWarnings("rawtypes")
    protected void fold(final Folder folder, final Option... options) {
        final int size = folder.core().size();


        // Quick pass for the probably most common events
        if (size <= 1) return;
        if (size == 2) {
            folder.handle(0, 1, 0);
            return;
        }


        final int NUM_THREADS = this.commonCore.profileInformation().numCPUs; 


        // Indicates which level (in the folding hierarchy) we are and where the next
        // thread should proceed. The base count indicates where which element should be
        // selected next by the thread, the level indicates how many times we already passed
        // through the whole array.
        final AtomicInteger baseCount = new AtomicInteger();
        final AtomicInteger round = new AtomicInteger();


        // Synchronizes threads. Each thread waits at the level-barrier when it finished the last level,
        // and waits the the global barrier when it is completely done. The main thread will also
        // wait at the global barrier (thus +1) for all spawned threads.
        final CyclicBarrier levelbarrier = new CyclicBarrier(NUM_THREADS);
        final CyclicBarrier barrier = new CyclicBarrier(NUM_THREADS + 1);
        
        // Algorithm example for 10 elements and 4 CPUs :
        // Content: a b c d e f g h i j
        // Array:   _ _ _ _ _ _ _ _ _ _
        // Index:   0 1 2 3 4 5 6 7 8 9
        
        // In general, we process in levels, in parallel. In each level, each thread
        // takes the next two elements, merges them, and stores them in the position of 
        // the first element. This means, after each level the number of elements we need to 
        // consider halves.
        
        // 1) We create 4 runner threads
        // 2) Each runner starts by picking / computing
        //    2.1) The current level (0 in the beginning) - lvl
        //    2.2) The distance of between X and Y for the fold operation in this level 2^0 = 1
        // 3) Each level, we check if the distance is bigger than the actual size (10 in this 
        //    case) of the array. If it is, we know we have finished.
        // 4) The upper bound for each run is the size of the array minus the stepping distance
        //    (10 - 1) = 9
        // 5) The index X for this operation is the base count (0 * 1) = 0
        // 6) The index Y is X + dist (0 + 1) = 1
        // 7) Until we reached the upper bound:
        //    7.1) handle the two elements
        //    7.2) Take the next available elements (X, Y)
        // ...
        
        // Given fold = max
        
        // Level 0, dist = 1
        // 0 x 1 -> 0
        // Content: b b c d e f g h i j
        // 2 x 3 -> 2
        // Content: b b d d e f g h i j
        // 4 x 5 -> 4
        // Content: b b d d f f g h i j
        // 6 x 7 -> 6
        // Content: b b d d f f h h i j
        // 8 x 9 -> 8
        // Content: b b d d f f h h i i
        
        // Level 1, dist = 2
        // 0 x 2 -> 0
        // Content: d b d d f f h h i i
        // 4 x 6 -> 4
        // Content: d b d d h f h h i i
        // 4 x 8 -> 4
        // Content: d b d d h i h h i i


        // Level 2, dist = 4
        // 0 x 4 -> 4
        // Content: i b d d h i h h i i


        // Return index[0]
        
        
        final Runnable runner = new Runnable() {
            public void run() {
                int rnd = round.get();
                int elementDistance = (int) Math.pow(2, rnd);


                // Each thread processes as long as the distance between elements is smaller
                // than the size
                while (elementDistance < size) {
                  // Each round the upper bound gets lower as we don't have to consider
                  // the last elements
                    final int upperBound = size - elementDistance;


                    
                    // Now we take a next element pair 
                    int lastHandledI = -1;
                    int i = baseCount.getAndAdd(2) * elementDistance;
                    int j = i + elementDistance;


                    // And each thread loop in here, until the right element
                    // has left the righter bound
                    while (j <= upperBound) {
                      // Process two elements and store them
                        folder.handle(i, j, i);


                        // Remember what we processed last
                        lastHandledI = i;
                        
                        // Take the next available element pairs
                        i = baseCount.getAndAdd(2) * elementDistance;
                        j = i + elementDistance;
                    }


                    
                    // Ideally, at this point, we are through the array. However, it might be the case 
                    // that an uneven number of elements was in that loop run
                    // 10 -> 5 pairs
                    // 11 -> 5 pairs, one left
                    
                    // So we check now if we were the thread processing the last element pair
                    // in that turn, and if there was a sole element left
                    
                    // This is the case when we processed something (lastHandledI > 0)
                    // and when element 2nd element to the right (the next one) exists
                    // but its pair (the 3rd one) does not. 
                    
                    // In that case we connect this element wit the next one 
                    
                    if (lastHandledI + 2 * elementDistance <= upperBound && 
                          lastHandledI + 3 * elementDistance > upperBound &&
                          lastHandledI >= 0) {
                          folder.handle(lastHandledI, lastHandledI + 2 * elementDistance, lastHandledI);
                    }
                
                    // At this point we finished the round
                    try {
                        levelbarrier.await();
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    } catch (BrokenBarrierException e) {
                        e.printStackTrace();
                    }
                    
                    
                    // If we were the one who changed the level, we also change the baseCount 
                    // back to 0
                    if (round.compareAndSet(rnd, rnd + 1)) {
                        baseCount.set(0);
                    }
                    
                   
                    // We first have to wait twice, as otherwise an other thread
                    // might be quicker than us in returning up to the loop again and use the
                    // old level value 
                    


                    // Now we all wait that the level has been updated
                    try {
                        levelbarrier.await();
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    } catch (BrokenBarrierException e) {
                        e.printStackTrace();
                    }
                    
                    // And get new parameters
                    rnd = round.get();
                    elementDistance = (int) Math.pow(2, rnd);
                }


                try {
                    barrier.await();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (BrokenBarrierException e) {
                    e.printStackTrace();
                }
            }
        };


        // Execute all our runnables.
        for(int i=0; i<NUM_THREADS; i++) {
            this.commonCore.executor().getExecutor().execute(runner);
        }


        // Wait for all threads to finish ...
        try {
            barrier.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (BrokenBarrierException e) {
            e.printStackTrace();
        }
    }
}
Source Code of net.jcores.jre.cores.Core

Related Classes of net.jcores.jre.cores.Core