User Tools

Site Tools


This is an old revision of the document!

@inproceedings{Zou:2015:AEE:2751205.2751245, author = {Zou, Yun and Rajopadhye, Sanjay}, title = {Automatic Energy Efficient Parallelization of Uniform Dependence Computations}, booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, series = {ICS '15}, year = {2015}, isbn = {978-1-4503-3559-1}, location = {Newport Beach, California, USA}, pages = {373–382}, numpages = {10}, url = {}, doi = {10.1145/2751205.2751245}, acmid = {2751245}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {automatic parallelization, energy consumption, hierarchical tiling, o-chip memory access, polyhedral model}, }

@inproceedings{Yuki:2015:RLT:2771774.2771778, author = {Yuki, Tomofumi}, title = {Revisiting Loop Transformations with x10 Clocks}, booktitle = {Proceedings of the ACM SIGPLAN Workshop on X10}, series = {X10 2015}, year = {2015}, isbn = {978-1-4503-3586-7}, location = {Portland, OR, USA}, pages = {1–6}, numpages = {6}, url = {}, doi = {10.1145/2771774.2771778}, acmid = {2771778}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {X10, affine schedule, clocks, loop transformation, parallel programming, synchronization, unimodular framework}, }

@inproceedings{Elango:2014:CDM:2612669.2612694, author = {Elango, Venmugil and Rastello, Fabrice and Pouchet, Louis-Noël and Ramanujam, J. and Sadayappan, P.}, title = {On Characterizing the Data Movement Complexity of Computational DAGs for Parallel Execution}, booktitle = {Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures}, series = {SPAA '14}, year = {2014}, isbn = {978-1-4503-2821-0}, location = {Prague, Czech Republic}, pages = {296–306}, numpages = {11}, url = {}, doi = {10.1145/2612669.2612694}, acmid = {2612694}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {i/o complexity, lower bounds, parallel data movement complexity, red-blue pebble game}, }

@inproceedings{Aloor:2015:UWM:2751205.2751238, author = {Aloor, Raghesh and Nandivada, V. Krishna}, title = {Unique Worker Model for OpenMP}, booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, series = {ICS '15}, year = {2015}, isbn = {978-1-4503-3559-1}, location = {Newport Beach, California, USA}, pages = {47–56}, numpages = {10}, url = {}, doi = {10.1145/2751205.2751238}, acmid = {2751238}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {barrier synchronization, multi-core, openmp, parallel-for loops}, }

@inproceedings{Sharma:2015:VPS:2737924.2737962, author = {Sharma, Rahul and Bauer, Michael and Aiken, Alex}, title = {Verification of Producer-consumer Synchronization in GPU Programs}, booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, series = {PLDI 2015}, year = {2015}, isbn = {978-1-4503-3468-6}, location = {Portland, OR, USA}, pages = {88–98}, numpages = {11}, url = {}, doi = {10.1145/2737924.2737962}, acmid = {2737962}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {GPUs, Verification, barrier recycling, data races, deadlock, named barriers, synchronization, warp specialization}, }

@inproceedings{Stengel:2015:QPB:2751205.2751240, author = {Stengel, Holger and Treibig, Jan and Hager, Georg and Wellein, Gerhard}, title = {Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model}, booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, series = {ICS '15}, year = {2015}, isbn = {978-1-4503-3559-1}, location = {Newport Beach, California, USA}, pages = {207–216}, numpages = {10}, url = {}, doi = {10.1145/2751205.2751240}, acmid = {2751240}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {multicore, optimization, performance model, stencils}, }

melange/papers/summer2015.1435870592.txt.gz · Last modified: 2015/07/02 14:56 by waruna