Both sides previous revision
Previous revision
Next revision
|
Previous revision
|
melange:papers:summer2015 [2015/07/02 14:46] waruna |
melange:papers:summer2015 [2015/07/02 16:30] (current) waruna |
keywords = {automatic parallelization, energy consumption, hierarchical tiling, o-chip memory access, polyhedral model}, | keywords = {automatic parallelization, energy consumption, hierarchical tiling, o-chip memory access, polyhedral model}, |
} | } |
| |
| @INPROCEEDINGS{6718367, |
| author={Yuki, T. and Morvan, A. and Derrien, S.}, |
| booktitle={Field-Programmable Technology (FPT), 2013 International Conference on}, |
| title={Derivation of efficient FSM from loop nests}, |
| year={2013}, |
| month={Dec}, |
| pages={286-293}, |
| keywords={finite state machines;pipeline processing;HLS;efficient FSM;efficient finite state machine;hardware utilization rate;high level synthesis tools;nested loop pipelining;Automata;Complexity theory;Degradation;Hardware;Optimization;Pipeline processing;Vectors}, |
| doi={10.1109/FPT.2013.6718367},} |
| |
| @inproceedings{Yuki:2015:RLT:2771774.2771778, |
| author = {Yuki, Tomofumi}, |
| title = {Revisiting Loop Transformations with x10 Clocks}, |
| booktitle = {Proceedings of the ACM SIGPLAN Workshop on X10}, |
| series = {X10 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3586-7}, |
| location = {Portland, OR, USA}, |
| pages = {1--6}, |
| numpages = {6}, |
| url = {http://doi.acm.org/10.1145/2771774.2771778}, |
| doi = {10.1145/2771774.2771778}, |
| acmid = {2771778}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {X10, affine schedule, clocks, loop transformation, parallel programming, synchronization, unimodular framework}, |
| } |
| |
@inproceedings{Elango:2014:CDM:2612669.2612694, | @inproceedings{Elango:2014:CDM:2612669.2612694, |
address = {New York, NY, USA}, | address = {New York, NY, USA}, |
keywords = {GPUs, Verification, barrier recycling, data races, deadlock, named barriers, synchronization, warp specialization}, | keywords = {GPUs, Verification, barrier recycling, data races, deadlock, named barriers, synchronization, warp specialization}, |
| } |
| |
| @inproceedings{Stengel:2015:QPB:2751205.2751240, |
| author = {Stengel, Holger and Treibig, Jan and Hager, Georg and Wellein, Gerhard}, |
| title = {Quantifying Performance Bottlenecks of Stencil Computations Using the Execution-Cache-Memory Model}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {207--216}, |
| numpages = {10}, |
| url = {http://doi.acm.org/10.1145/2751205.2751240}, |
| doi = {10.1145/2751205.2751240}, |
| acmid = {2751240}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {multicore, optimization, performance model, stencils}, |
| } |
| |
| @inproceedings{Mehta:2015:ICS:2737924.2737954, |
| author = {Mehta, Sanyam and Yew, Pen-Chung}, |
| title = {Improving Compiler Scalability: Optimizing Large Programs at Small Price}, |
| booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3468-6}, |
| location = {Portland, OR, USA}, |
| pages = {143--152}, |
| numpages = {10}, |
| url = {http://doi.acm.org/10.1145/2737924.2737954}, |
| doi = {10.1145/2737924.2737954}, |
| acmid = {2737954}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {Compiler scalability, O-molecule, optimization, polyhedral model, statement condensation}, |
| } |
| |
| @inproceedings{Ding:2015:OOA:2737924.2737989, |
| author = {Ding, Wei and Tang, Xulong and Kandemir, Mahmut and Zhang, Yuanrui and Kultursay, Emre}, |
| title = {Optimizing Off-chip Accesses in Multicores}, |
| booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3468-6}, |
| location = {Portland, OR, USA}, |
| pages = {131--142}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2737924.2737989}, |
| doi = {10.1145/2737924.2737989}, |
| acmid = {2737989}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {Manycores, memory controller, off-chip accesses localization}, |
| } |
| |
| @inproceedings{Mendis:2015:HLH:2737924.2737974, |
| author = {Mendis, Charith and Bosboom, Jeffrey and Wu, Kevin and Kamil, Shoaib and Ragan-Kelley, Jonathan and Paris, Sylvain and Zhao, Qin and Amarasinghe, Saman}, |
| title = {Helium: Lifting High-performance Stencil Kernels from Stripped x86 Binaries to Halide DSL Code}, |
| booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3468-6}, |
| location = {Portland, OR, USA}, |
| pages = {391--402}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2737924.2737974}, |
| doi = {10.1145/2737924.2737974}, |
| acmid = {2737974}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {Helium, autotuning, dynamic analysis, image processing, reverse engineering, stencil computation, x86 binary instrumentation}, |
| } |
| |
| @inproceedings{Sivaramakrishnan:2015:DPO:2737924.2737981, |
| author = {Sivaramakrishnan, KC and Kaki, Gowtham and Jagannathan, Suresh}, |
| title = {Declarative Programming over Eventually Consistent Data Stores}, |
| booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3468-6}, |
| location = {Portland, OR, USA}, |
| pages = {413--424}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2737924.2737981}, |
| doi = {10.1145/2737924.2737981}, |
| acmid = {2737981}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {Availability, Axiomatic Contracts, CRDTs, Cassandra, Contract Classification, Decidable Logic, Distributed Transactions, Eventual Consistency, Haskell, Quelea, SMT solvers}, |
| } |
| |
| @inproceedings{Venkat:2015:LDT:2737924.2738003, |
| author = {Venkat, Anand and Hall, Mary and Strout, Michelle}, |
| title = {Loop and Data Transformations for Sparse Matrix Code}, |
| booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, |
| series = {PLDI 2015}, |
| year = {2015}, |
| isbn = {978-1-4503-3468-6}, |
| location = {Portland, OR, USA}, |
| pages = {521--532}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2737924.2738003}, |
| doi = {10.1145/2737924.2738003}, |
| acmid = {2738003}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {inspector/executor, loop transformations, non-affine, polyhedral model, sparse matrices}, |
| } |
| |
| |
| @article{Xipeng2013, |
| year={2013}, |
| issn={0885-7458}, |
| journal={International Journal of Parallel Programming}, |
| volume={41}, |
| number={6}, |
| doi={10.1007/s10766-012-0236-3}, |
| title={An Infrastructure for Tackling Input-Sensitivity of GPU Program Optimizations}, |
| url={http://dx.doi.org/10.1007/s10766-012-0236-3}, |
| publisher={Springer US}, |
| keywords={GPU; Program Optimizations; Empirical Search; CUDA; G-ADAPT; Cross-input Adaptation}, |
| author={Shen, Xipeng and Liu, Yixun and Zhang, EddyZ. and Bhamidipati, Poornima}, |
| pages={855-869}, |
| language={English} |
| } |
| |
| @inproceedings{Bertolacci:2015:PDT:2751205.2751226, |
| author = {Bertolacci, Ian J. and Olschanowsky, Catherine and Harshbarger, Ben and Chamberlain, Bradford L. and Wonnacott, David G. and Strout, Michelle Mills}, |
| title = {Parameterized Diamond Tiling for Stencil Computations with Chapel Parallel Iterators}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {197--206}, |
| numpages = {10}, |
| url = {http://doi.acm.org/10.1145/2751205.2751226}, |
| doi = {10.1145/2751205.2751226}, |
| acmid = {2751226}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {chapel, diamond tiling, parallel iterators, separation of concerns, stencil computations}, |
| } |
| |
| |
| @inproceedings{Wu:2015:EEF:2751205.2751213, |
| author = {Wu, Bo and Chen, Guoyang and Li, Dong and Shen, Xipeng and Vetter, Jeffrey}, |
| title = {Enabling and Exploiting Flexible Task Assignment on GPU Through SM-Centric Program Transformations}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {119--130}, |
| numpages = {12}, |
| url = {http://doi.acm.org/10.1145/2751205.2751213}, |
| doi = {10.1145/2751205.2751213}, |
| acmid = {2751213}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {compiler transformation, data affinity, gpgpu, program co-run, scheduling}, |
| } |
| |
| @inproceedings{Aga:2015:ZDC:2751205.2751211, |
| author = {Aga, Shaizeen and Singh, Abhayendra and Narayanasamy, Satish}, |
| title = {zFENCE: Data-less Coherence for Efficient Fences}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {295--305}, |
| numpages = {11}, |
| url = {http://doi.acm.org/10.1145/2751205.2751211}, |
| doi = {10.1145/2751205.2751211}, |
| acmid = {2751211}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {data less coherence, fences, memory consistency, parallel programming, sequential consistency}, |
| } |
| |
| @inproceedings{Grosser:2015:ODP:2751205.2751248, |
| author = {Grosser, Tobias and Pop, Sebastian and Pouchet, Louis-Noel and Sadayappan, P. and Pop, Sebastian}, |
| title = {Optimistic Delinearization of Parametrically Sized Arrays}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {351--360}, |
| numpages = {10}, |
| url = {http://doi.acm.org/10.1145/2751205.2751248}, |
| doi = {10.1145/2751205.2751248}, |
| acmid = {2751248}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {linear memory layout, multi-dimensional arrays, polyhedral analysis}, |
| } |
| |
| @inproceedings{Zandifar:2015:CAS:2751205.2751241, |
| author = {Zandifar, Mani and Abdul Jabbar, Mustafa and Majidi, Alireza and Keyes, David and Amato, Nancy M. and Rauchwerger, Lawrence}, |
| title = {Composing Algorithmic Skeletons to Express High-Performance Scientific Applications}, |
| booktitle = {Proceedings of the 29th ACM on International Conference on Supercomputing}, |
| series = {ICS '15}, |
| year = {2015}, |
| isbn = {978-1-4503-3559-1}, |
| location = {Newport Beach, California, USA}, |
| pages = {415--424}, |
| numpages = {10}, |
| url = {http://doi.acm.org/10.1145/2751205.2751241}, |
| doi = {10.1145/2751205.2751241}, |
| acmid = {2751241}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| keywords = {algorithmic skeletons, data flow programming, distributed systems, high-performance computing, patterns}, |
} | } |