This shows you the differences between two versions of the page.
Next revision | Previous revision Next revision Both sides next revision | ||
melange:papers:spring2017 [2017/01/18 10:17] swetha created |
melange:papers:spring2017 [2017/04/17 17:10] prerana |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | @INPROCEEDINGS{6012857, | + | @ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam |
- | author={de O Sandes, E. F. and de Melo, A. C. M. A.}, | + | and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, |
- | booktitle={Parallel Distributed Processing Symposium (IPDPS), | + | |
- | title={Smith-Waterman Alignment of Huge Sequences with GPU in Linear Space}, | + | |
+ | |||
+ | |||
+ | @INPROCEEDINGS{6043234, | ||
+ | author={A. Pedram and A. Gerstlauer and R. A. v. d. Geijn}, | ||
+ | booktitle={ASAP 2011 - 22nd IEEE International | ||
+ | title={A high-performance, | ||
year={2011}, | year={2011}, | ||
- | month=may, | + | pages={35-42}, |
- | pages={1199-1211}, | + | keywords={floating point arithmetic; |
- | keywords={bioinformatics;cellular biophysics;coprocessors;parallel algorithms;GPU;GTX 285 Board;Myers-Miller algorithm;Smith-Waterman alignment;ancestral relationships;bioinformatics;cross-species chromosome alignments;high performance computing platform;linear space complexity;parallel algorithm;species peculiarity identification;Bioinformatics;Computer architecture; | + | doi={10.1109/ |
- | doi={10.1109/ | + | ISSN={1063-6862}, |
- | ISSN={1530-2075},} | + | month={Sept},} |
+ | |||
+ | |||
+ | |||
+ | @inproceedings{Bandishti: | ||
+ | | ||
+ | title = {Tiling Stencil Computations to Maximize Parallelism}, | ||
+ | | ||
+ | | ||
+ | year = {2012}, | ||
+ | isbn = {978-1-4673-0804-5}, | ||
+ | | ||
+ | pages = {40:1--40:11}, | ||
+ | | ||
+ | | ||
+ | url = {http:// | ||
+ | acmid = {2389051}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @ARTICLE{7582549, | ||
+ | author={U. Bondhugula and V. Bandishti and I. Pananilath}, | ||
+ | journal={IEEE Transactions on Parallel and Distributed Systems}, | ||
+ | title={Diamond Tiling: Tiling Techniques to Maximize Parallelism for Stencil Computations}, | ||
+ | year={2016}, | ||
+ | url={http:// | ||
+ | volume={PP}, | ||
+ | number={99}, | ||
+ | pages={1-1}, | ||
+ | keywords={Diamond;Face;Indexes;Optimization;Parallel processing;Shape;Silicon;Compilers;locality;loop tiling;parallelism; | ||
+ | doi={10.1109/ | ||
+ | ISSN={1045-9219}, | ||
+ | month={}, | ||
+ | |||
+ | @ARTICLE{7155440, | ||
+ | author={T. Nowatzki and J. Menon and C. H. Ho and K. Sankaralingam}, | ||
+ | journal={IEEE Micro}, | ||
+ | title={Architectural Simulators Considered Harmful}, | ||
+ | year={2015}, | ||
+ | url={http:// | ||
+ | volume={35}, | ||
+ | number={6}, | ||
+ | pages={4-12}, | ||
+ | keywords={computer architecture;digital simulation;architectural layers;architectural simulators;black boxes;evaluation standard recalibration; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0272-1732}, | ||
+ | month={Nov}, | ||
+ | |||
+ | @INPROCEEDINGS{7349585, | ||
+ | author={J. D. Garvey and T. S. Abdelrahman}, | ||
+ | booktitle={2015 44th International Conference on Parallel Processing}, | ||
+ | title={Automatic Performance Tuning of Stencil Computations on GPUs}, | ||
+ | year={2015}, | ||
+ | pages={300-309}, | ||
+ | keywords={graphics processing units; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0190-3918}, | ||
+ | month={Sept}, | ||
+ | |||
+ | @article{DBLPSteve, | ||
+ | author | ||
+ | Cliff Woolley and | ||
+ | | ||
+ | | ||
+ | John Tran and | ||
+ | Bryan Catanzaro and | ||
+ | Evan Shelhamer}, | ||
+ | title = {cuDNN: Efficient Primitives for Deep Learning}, | ||
+ | journal | ||
+ | volume | ||
+ | year = {2014}, | ||
+ | url = {http:// | ||
+ | timestamp = {Sun, 02 Nov 2014 11:25:59 +0100}, | ||
+ | biburl | ||
+ | bibsource = {dblp computer science bibliography, | ||
+ | } | ||
+ | |||
+ | @article{Bao: | ||
+ | | ||
+ | title = {Static and Dynamic Frequency Scaling on Multicore CPUs}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = dec, | ||
+ | year = {2016}, | ||
+ | issn = {1544-3566}, | ||
+ | pages = {51: | ||
+ | | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {3011017}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Pouchet: | ||
+ | | ||
+ | title = {Polyhedral-based Data Reuse Optimization for Configurable Computing}, | ||
+ | | ||
+ | | ||
+ | year = {2013}, | ||
+ | isbn = {978-1-4503-1887-7}, | ||
+ | | ||
+ | pages = {29--38}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {2435273}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{Kong: | ||
+ | | ||
+ | title = {When Polyhedral Transformations Meet SIMD Code Generation}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = jun, | ||
+ | year = {2013}, | ||
+ | issn = {0362-1340}, | ||
+ | pages = {127--138}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {2462187}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{cummins2017synthesizing, | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | @article{optimistic2017, | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | @inbook{e0cc7363fd684a529d1ba82b8195d530, | ||
+ | title = " | ||
+ | keywords | ||
+ | author | ||
+ | note = "Date of Acceptance: 25/ | ||
+ | year = " | ||
+ | month = " | ||
+ | booktitle = "The International Symposium on Code Generation and Optimization (CGO) 2017", | ||
+ | } | ||
+ | |||
+ | BibTeX | EndNote | ACM Ref | ||
+ | |||
+ | @inproceedings{Putnam: | ||
+ | | ||
+ | title = {A Reconfigurable Fabric for Accelerating Large-scale Datacenter Services}, | ||
+ | | ||
+ | | ||
+ | year = {2014}, | ||
+ | isbn = {978-1-4799-4394-4}, | ||
+ | | ||
+ | pages = {13--24}, | ||
+ | | ||
+ | url = {http:// | ||
+ | acmid = {2665678}, | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | [download] | ||
+ | |||
+ | @miscellaneous{accelerating-deep-convolutional-neural-networks-using-specialized-hardware, | ||
+ | author = {Kalin Ovtcharov, Olatunji Ruwase, Joo-Young Kim, Jeremy Fowers, Karin Strauss, Eric Chung}, | ||
+ | title = {Accelerating Deep Convolutional Neural Networks Using Specialized Hardware}, | ||
+ | booktitle = {}, | ||
+ | year = {2015}, | ||
+ | month = {February}, | ||
+ | abstract = { | ||
+ | |||
+ | We describe the design of a convolutional neural network accelerator running on a Stratix V FPGA. The design runs at three times the throughput of previous FPGA CNN accelerator designs. We show that the throughput/ | ||
+ | |||
+ | |||
+ | }, | ||
+ | publisher = {Microsoft Research}, | ||
+ | url = {https:// | ||
+ | address = {}, | ||
+ | pages = {}, | ||
+ | journal = {}, | ||
+ | volume = {}, | ||
+ | chapter = {}, | ||
+ | isbn = {}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Deitz: | ||
+ | | ||
+ | title = {Eliminating Redundancies in Sum-of-product Array Computations}, | ||
+ | | ||
+ | | ||
+ | year = {2001}, | ||
+ | isbn = {1-58113-410-X}, | ||
+ | | ||
+ | pages = {65--77}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {377807}, | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Basu: | ||
+ | | ||
+ | title = {Compiler-Directed Transformation for Higher-Order Stencils}, | ||
+ | | ||
+ | | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4799-8649-1}, | ||
+ | pages = {313--323}, | ||
+ | | ||
+ | url = {http://dx.doi.org/ | ||
+ | doi = {10.1109/ | ||
+ | acmid = {2863932}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Putnam: | ||
+ | | ||
+ | title = {CHiMPS: A High-level Compilation Flow for Hybrid CPU-FPGA Architectures}, | ||
+ | | ||
+ | | ||
+ | year = {2008}, | ||
+ | isbn = {978-1-59593-934-0}, | ||
+ | | ||
+ | pages = {261--261}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {1344720}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | @inproceedings{Wong: | ||
+ | | ||
+ | title = {Comparing FPGA vs. Custom Cmos and the Impact on Processor Microarchitecture}, | ||
+ | | ||
+ | | ||
+ | year = {2011}, | ||
+ | isbn = {978-1-4503-0554-9}, | ||
+ | | ||
+ | pages = {5--14}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {1950419}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{DBLP: | ||
+ | author | ||
+ | | ||
+ | Ivo Danihelka and | ||
+ | Marc Lanctot and | ||
+ | Alex Graves}, | ||
+ | title = {Memory-Efficient Backpropagation Through Time}, | ||
+ | journal | ||
+ | volume | ||
+ | year = {2016}, | ||
+ | url = {http:// | ||
+ | timestamp = {Fri, 01 Jul 2016 17:39:49 +0200}, | ||
+ | biburl | ||
+ | bibsource = {dblp computer science bibliography, | ||
+ | } | ||
+ | |||
+ | @inproceedings{FlowMap1994, | ||
+ | | ||
+ | title = {FlowMap: an optimal technology mapping algorithm for delay optimization in lookup-table based FPGA designs}, | ||
+ | | ||
+ | year = {1994}, | ||
+ | isbn = {1937-4151}, | ||
+ | pages = {1-12}, | ||
+ | url = {http:// | ||
+ | doi = {10.1109/ | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{MILROY20161589, | ||
+ | title = " | ||
+ | journal = " | ||
+ | volume = " | ||
+ | number = "", | ||
+ | pages = "1589 - 1600", | ||
+ | year = " | ||
+ | note = "", | ||
+ | issn = " | ||
+ | doi = " | ||
+ | url = " | ||
+ | author = " | ||
+ | keywords = " | ||
+ | keywords = "CESM Ensemble Consistency Test", | ||
+ | keywords = " | ||
+ | keywords = "code modification as source of variability", | ||
+ | keywords = " | ||
+ | keywords = " | ||
+ | keywords = " | ||
+ | keywords = "Fused Multiply-Add" | ||
+ | } |