This shows you the differences between two versions of the page.
Next revision | Previous revision Next revision Both sides next revision | ||
melange:papers:summer2015 [2015/07/02 14:04] waruna created |
melange:papers:summer2015 [2015/07/02 16:27] waruna |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | @inproceedings{Zou:2015:AEE:2751205.2751245, | ||
+ | author = {Zou, Yun and Rajopadhye, Sanjay}, | ||
+ | title = {Automatic Energy Efficient Parallelization of Uniform Dependence Computations}, | ||
+ | booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, | ||
+ | series = {ICS '15}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3559-1}, | ||
+ | location = {Newport Beach, California, USA}, | ||
+ | pages = {373--382}, | ||
+ | numpages = {10}, | ||
+ | url = {http://doi.acm.org/10.1145/2751205.2751245}, | ||
+ | doi = {10.1145/2751205.2751245}, | ||
+ | acmid = {2751245}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {automatic parallelization, energy consumption, hierarchical tiling, o-chip memory access, polyhedral model}, | ||
+ | } | ||
+ | |||
+ | @INPROCEEDINGS{6718367, | ||
+ | author={Yuki, T. and Morvan, A. and Derrien, S.}, | ||
+ | booktitle={Field-Programmable Technology (FPT), 2013 International Conference on}, | ||
+ | title={Derivation of efficient FSM from loop nests}, | ||
+ | year={2013}, | ||
+ | month={Dec}, | ||
+ | pages={286-293}, | ||
+ | keywords={finite state machines;pipeline processing;HLS;efficient FSM;efficient finite state machine;hardware utilization rate;high level synthesis tools;nested loop pipelining;Automata;Complexity theory;Degradation;Hardware;Optimization;Pipeline processing;Vectors}, | ||
+ | doi={10.1109/FPT.2013.6718367},} | ||
+ | |||
+ | @inproceedings{Yuki:2015:RLT:2771774.2771778, | ||
+ | author = {Yuki, Tomofumi}, | ||
+ | title = {Revisiting Loop Transformations with x10 Clocks}, | ||
+ | booktitle = {Proceedings of the ACM SIGPLAN Workshop on X10}, | ||
+ | series = {X10 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3586-7}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {1--6}, | ||
+ | numpages = {6}, | ||
+ | url = {http://doi.acm.org/10.1145/2771774.2771778}, | ||
+ | doi = {10.1145/2771774.2771778}, | ||
+ | acmid = {2771778}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {X10, affine schedule, clocks, loop transformation, parallel programming, synchronization, unimodular framework}, | ||
+ | } | ||
+ | |||
@inproceedings{Elango:2014:CDM:2612669.2612694, | @inproceedings{Elango:2014:CDM:2612669.2612694, | ||
- | author = {Elango, Venmugil and Rastello, Fabrice and Pouchet, Louis-No\"{e}l and Ramanujam, J. and Sadayappan, P.}, | + | author = {Elango, Venmugil and Rastello, Fabrice and Pouchet, Louis-Noël and Ramanujam, J. and Sadayappan, P.}, |
title = {On Characterizing the Data Movement Complexity of Computational DAGs for Parallel Execution}, | title = {On Characterizing the Data Movement Complexity of Computational DAGs for Parallel Execution}, | ||
booktitle = {Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures}, | booktitle = {Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures}, | ||
Line 52: | Line 98: | ||
keywords = {GPUs, Verification, barrier recycling, data races, deadlock, named barriers, synchronization, warp specialization}, | keywords = {GPUs, Verification, barrier recycling, data races, deadlock, named barriers, synchronization, warp specialization}, | ||
} | } | ||
+ | |||
+ | @inproceedings{Stengel:2015:QPB:2751205.2751240, | ||
+ | author = {Stengel, Holger and Treibig, Jan and Hager, Georg and Wellein, Gerhard}, | ||
+ | title = {Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model}, | ||
+ | booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, | ||
+ | series = {ICS '15}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3559-1}, | ||
+ | location = {Newport Beach, California, USA}, | ||
+ | pages = {207--216}, | ||
+ | numpages = {10}, | ||
+ | url = {http://doi.acm.org/10.1145/2751205.2751240}, | ||
+ | doi = {10.1145/2751205.2751240}, | ||
+ | acmid = {2751240}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {multicore, optimization, performance model, stencils}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Mehta:2015:ICS:2737924.2737954, | ||
+ | author = {Mehta, Sanyam and Yew, Pen-Chung}, | ||
+ | title = {Improving Compiler Scalability: Optimizing Large Programs at Small Price}, | ||
+ | booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, | ||
+ | series = {PLDI 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3468-6}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {143--152}, | ||
+ | numpages = {10}, | ||
+ | url = {http://doi.acm.org/10.1145/2737924.2737954}, | ||
+ | doi = {10.1145/2737924.2737954}, | ||
+ | acmid = {2737954}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {Compiler scalability, O-molecule, optimization, polyhedral model, statement condensation}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Ding:2015:OOA:2737924.2737989, | ||
+ | author = {Ding, Wei and Tang, Xulong and Kandemir, Mahmut and Zhang, Yuanrui and Kultursay, Emre}, | ||
+ | title = {Optimizing Off-chip Accesses in Multicores}, | ||
+ | booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, | ||
+ | series = {PLDI 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3468-6}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {131--142}, | ||
+ | numpages = {12}, | ||
+ | url = {http://doi.acm.org/10.1145/2737924.2737989}, | ||
+ | doi = {10.1145/2737924.2737989}, | ||
+ | acmid = {2737989}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {Manycores, memory controller, off-chip accesses localization}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Mendis:2015:HLH:2737924.2737974, | ||
+ | author = {Mendis, Charith and Bosboom, Jeffrey and Wu, Kevin and Kamil, Shoaib and Ragan-Kelley, Jonathan and Paris, Sylvain and Zhao, Qin and Amarasinghe, Saman}, | ||
+ | title = {Helium: Lifting High-performance Stencil Kernels from Stripped x86 Binaries to Halide DSL Code}, | ||
+ | booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, | ||
+ | series = {PLDI 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3468-6}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {391--402}, | ||
+ | numpages = {12}, | ||
+ | url = {http://doi.acm.org/10.1145/2737924.2737974}, | ||
+ | doi = {10.1145/2737924.2737974}, | ||
+ | acmid = {2737974}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {Helium, autotuning, dynamic analysis, image processing, reverse engineering, stencil computation, x86 binary instrumentation}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Sivaramakrishnan:2015:DPO:2737924.2737981, | ||
+ | author = {Sivaramakrishnan, KC and Kaki, Gowtham and Jagannathan, Suresh}, | ||
+ | title = {Declarative Programming over Eventually Consistent Data Stores}, | ||
+ | booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, | ||
+ | series = {PLDI 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3468-6}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {413--424}, | ||
+ | numpages = {12}, | ||
+ | url = {http://doi.acm.org/10.1145/2737924.2737981}, | ||
+ | doi = {10.1145/2737924.2737981}, | ||
+ | acmid = {2737981}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {Availability, Axiomatic Contracts, CRDTs, Cassandra, Contract Classification, Decidable Logic, Distributed Transactions, Eventual Consistency, Haskell, Quelea, SMT solvers}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Venkat:2015:LDT:2737924.2738003, | ||
+ | author = {Venkat, Anand and Hall, Mary and Strout, Michelle}, | ||
+ | title = {Loop and Data Transformations for Sparse Matrix Code}, | ||
+ | booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, | ||
+ | series = {PLDI 2015}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3468-6}, | ||
+ | location = {Portland, OR, USA}, | ||
+ | pages = {521--532}, | ||
+ | numpages = {12}, | ||
+ | url = {http://doi.acm.org/10.1145/2737924.2738003}, | ||
+ | doi = {10.1145/2737924.2738003}, | ||
+ | acmid = {2738003}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {inspector/executor, loop transformations, non-affine, polyhedral model, sparse matrices}, | ||
+ | } | ||
+ | |||
+ | |||
+ | @article{Xipeng2013, | ||
+ | year={2013}, | ||
+ | issn={0885-7458}, | ||
+ | journal={International Journal of Parallel Programming}, | ||
+ | volume={41}, | ||
+ | number={6}, | ||
+ | doi={10.1007/s10766-012-0236-3}, | ||
+ | title={An Infrastructure for Tackling Input-Sensitivity of GPU Program Optimizations}, | ||
+ | url={http://dx.doi.org/10.1007/s10766-012-0236-3}, | ||
+ | publisher={Springer US}, | ||
+ | keywords={GPU; Program Optimizations; Empirical Search; CUDA; G-ADAPT; Cross-input Adaptation}, | ||
+ | author={Shen, Xipeng and Liu, Yixun and Zhang, EddyZ. and Bhamidipati, Poornima}, | ||
+ | pages={855-869}, | ||
+ | language={English} | ||
+ | } | ||
+ | |||
+ | @inproceedings{Bertolacci:2015:PDT:2751205.2751226, | ||
+ | author = {Bertolacci, Ian J. and Olschanowsky, Catherine and Harshbarger, Ben and Chamberlain, Bradford L. and Wonnacott, David G. and Strout, Michelle Mills}, | ||
+ | title = {Parameterized Diamond Tiling for Stencil Computations with Chapel Parallel Iterators}, | ||
+ | booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, | ||
+ | series = {ICS '15}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3559-1}, | ||
+ | location = {Newport Beach, California, USA}, | ||
+ | pages = {197--206}, | ||
+ | numpages = {10}, | ||
+ | url = {http://doi.acm.org/10.1145/2751205.2751226}, | ||
+ | doi = {10.1145/2751205.2751226}, | ||
+ | acmid = {2751226}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {chapel, diamond tiling, parallel iterators, separation of concerns, stencil computations}, | ||
+ | } | ||
+ | |||
+ | |||
+ | @inproceedings{Wu:2015:EEF:2751205.2751213, | ||
+ | author = {Wu, Bo and Chen, Guoyang and Li, Dong and Shen, Xipeng and Vetter, Jeffrey}, | ||
+ | title = {Enabling and Exploiting Flexible Task Assignment on GPU Through SM-Centric Program Transformations}, | ||
+ | booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, | ||
+ | series = {ICS '15}, | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4503-3559-1}, | ||
+ | location = {Newport Beach, California, USA}, | ||
+ | pages = {119--130}, | ||
+ | numpages = {12}, | ||
+ | url = {http://doi.acm.org/10.1145/2751205.2751213}, | ||
+ | doi = {10.1145/2751205.2751213}, | ||
+ | acmid = {2751213}, | ||
+ | publisher = {ACM}, | ||
+ | address = {New York, NY, USA}, | ||
+ | keywords = {compiler transformation, data affinity, gpgpu, program co-run, scheduling}, | ||
+ | } |