This shows you the differences between two versions of the page.
Next revision | Previous revision Next revision Both sides next revision | ||
melange:papers:spring2017 [2017/01/18 10:17] swetha created |
melange:papers:spring2017 [2018/02/07 16:16] prerana |
||
---|---|---|---|
Line 1: | Line 1: | ||
- | @INPROCEEDINGS{6012857, | + | |
- | author={de O Sandes, E. F. and de Melo, A. C. M. A.}, | + | |
- | booktitle={Parallel Distributed Processing Symposium | + | |
- | title={Smith-Waterman Alignment of Huge Sequences with GPU in Linear Space}, | + | @Article{Griebl2000, |
+ | author=" | ||
+ | and Feautrier, Paul | ||
+ | and Lengauer, Christian", | ||
+ | title=" | ||
+ | journal=" | ||
+ | year=" | ||
+ | month=" | ||
+ | day=" | ||
+ | volume=" | ||
+ | number=" | ||
+ | pages=" | ||
+ | abstract=" | ||
+ | issn=" | ||
+ | doi=" | ||
+ | url=" | ||
+ | } | ||
+ | |||
+ | @inproceedings{Irigoin: | ||
+ | | ||
+ | title = {Supernode Partitioning}, | ||
+ | | ||
+ | | ||
+ | year = {1988}, | ||
+ | isbn = {0-89791-252-7}, | ||
+ | | ||
+ | pages = {319--329}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {73588}, | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @ARTICLE{7738524, | ||
+ | author={Y. H. Chen and T. Krishna and J. S. Emer and V. Sze}, | ||
+ | journal={IEEE Journal of Solid-State Circuits}, | ||
+ | title={Eyeriss: | ||
+ | year={2017}, | ||
+ | volume={52}, | ||
+ | number={1}, | ||
+ | pages={127-138}, | ||
+ | keywords={DRAM chips;data flow computing; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0018-9200}, | ||
+ | month={Jan}, | ||
+ | |||
+ | @ARTICLE{88484, | ||
+ | author={J. H. Saltz and R. Mirchandaney and K. Crowley}, | ||
+ | journal={IEEE Transactions on Computers}, | ||
+ | title={Run-time parallelization and scheduling of loops}, | ||
+ | year={1991}, | ||
+ | volume={40}, | ||
+ | number={5}, | ||
+ | pages={603-612}, | ||
+ | keywords={parallel programming; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0018-9340}, | ||
+ | month={May}, | ||
+ | |||
+ | BibTeX | EndNote | ACM Ref | ||
+ | @article{Feautrier: | ||
+ | | ||
+ | title = {Scalable and Structured Scheduling}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = oct, | ||
+ | year = {2006}, | ||
+ | issn = {0885-7458}, | ||
+ | pages = {459--487}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1007/ | ||
+ | acmid = {1217447}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{Verdoolaege: | ||
+ | | ||
+ | title = {Equivalence Checking of Static Affine Programs Using Widening to Handle Recurrences}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = nov, | ||
+ | year = {2012}, | ||
+ | issn = {0164-0925}, | ||
+ | pages = {11: | ||
+ | | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {2362390}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Kulkarni: | ||
+ | | ||
+ | title = {Optimistic Parallelism Requires Abstractions}, | ||
+ | | ||
+ | | ||
+ | year = {2007}, | ||
+ | isbn = {978-1-59593-633-2}, | ||
+ | | ||
+ | pages = {211--222}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {1250759}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{sanjay-fsttcs86, | ||
+ | Address = {New Delhi, India}, | ||
+ | Author = {Rajopadhye, | ||
+ | Booktitle = {Proceedings, | ||
+ | Technology and Theoretical Computer Science}, | ||
+ | Key = {Rajopadhye86b}, | ||
+ | Month = {December}, | ||
+ | Pages = {488-503}, | ||
+ | Publisher = {Springer Verlag, LNCS~241}, | ||
+ | Title = {On Synthesizing Systolic Arrays from Recurrence Equations | ||
+ | with Linear Dependencies}, | ||
+ | Year = {1986}} | ||
+ | |||
+ | @article{quinton-jvsp89, | ||
+ | Author = {Quinton, P. and {Van Dongen}, V.}, | ||
+ | Journal = {Journal of {VLSI} Signal Processing}, | ||
+ | Number = 2, | ||
+ | Pages = {95-113}, | ||
+ | Publisher = {Kluwer Academic Publishers, Boston}, | ||
+ | Title = {The Mapping of Linear Recurrence Equations on Regular Arrays}, | ||
+ | Volume = 1, | ||
+ | Year = 1989} | ||
+ | |||
+ | @Article{feautrier91, | ||
+ | author= | ||
+ | title= | ||
+ | journal= | ||
+ | year= | ||
+ | volume= | ||
+ | number= | ||
+ | pages= | ||
+ | month= | ||
+ | annote= | ||
+ | program (consisting only of assignments, | ||
+ | limits, and arrays with affine index expressions), | ||
+ | analyzed to find the flow dependencies." | ||
+ | } | ||
+ | |||
+ | @article{feautrier92a, | ||
+ | author | ||
+ | title = {Some Efficient Solutions to the Affine Scheduling Problem. | ||
+ | {Part I}. One-dimensional Time}, | ||
+ | journal | ||
+ | volume | ||
+ | number | ||
+ | year = {1992}, | ||
+ | pages = {313-347}, | ||
+ | ee = {http:// | ||
+ | } | ||
+ | |||
+ | @article{feautrier92b, | ||
+ | author | ||
+ | title = {Some Efficient Solutions to the Affine Scheduling Problem. | ||
+ | {Part II}. Multidimensional Time}, | ||
+ | journal | ||
+ | volume | ||
+ | number | ||
+ | year = {1992}, | ||
+ | pages = {389-420}, | ||
+ | ee = {http:// | ||
+ | } | ||
+ | |||
+ | @Article{collard-etal-fuzzy-jpdc-1997, | ||
+ | author = | ||
+ | title = {Fuzzy Array Data Flow Analysis}, | ||
+ | journal = {Journal of Parallel and Distributed Computing}, | ||
+ | year = | ||
+ | volume = 40, | ||
+ | number = 2, | ||
+ | pages = {210-226}, | ||
+ | month = {Feb}} | ||
+ | |||
+ | |||
+ | @InProceedings{BPCB10, | ||
+ | author = {Benabderrahmane, | ||
+ | Bastoul, C.}, | ||
+ | title = {The Polyhedral Model Is More Widely Applicable Than You | ||
+ | Think}, | ||
+ | | ||
+ | Construction | ||
+ | year = 2010, | ||
+ | series = {LNCS}, | ||
+ | address = | ||
+ | pages = | ||
+ | month = | ||
+ | publisher = | ||
+ | |||
+ | |||
+ | |||
+ | @ARTICAL{123, | ||
+ | and P. Sadayappan}, | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | @INPROCEEDINGS{6043234, | ||
+ | author={A. Pedram and A. Gerstlauer and R. A. v. d. Geijn}, | ||
+ | booktitle={ASAP | ||
+ | title={A high-performance, | ||
year={2011}, | year={2011}, | ||
- | month=may, | + | pages={35-42}, |
- | pages={1199-1211}, | + | keywords={floating point arithmetic; |
- | keywords={bioinformatics;cellular biophysics;coprocessors;parallel algorithms;GPU;GTX 285 Board;Myers-Miller algorithm;Smith-Waterman alignment;ancestral relationships;bioinformatics;cross-species chromosome alignments;high performance computing platform;linear space complexity;parallel algorithm;species peculiarity identification;Bioinformatics;Computer architecture; | + | doi={10.1109/ |
- | doi={10.1109/ | + | ISSN={1063-6862}, |
- | ISSN={1530-2075},} | + | month={Sept},} |
+ | |||
+ | |||
+ | |||
+ | @inproceedings{Bandishti: | ||
+ | | ||
+ | title = {Tiling Stencil Computations to Maximize Parallelism}, | ||
+ | | ||
+ | | ||
+ | year = {2012}, | ||
+ | isbn = {978-1-4673-0804-5}, | ||
+ | | ||
+ | pages = {40:1--40:11}, | ||
+ | | ||
+ | | ||
+ | url = {http:// | ||
+ | acmid = {2389051}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @ARTICLE{7582549, | ||
+ | author={U. Bondhugula and V. Bandishti and I. Pananilath}, | ||
+ | journal={IEEE Transactions on Parallel and Distributed Systems}, | ||
+ | title={Diamond Tiling: Tiling Techniques to Maximize Parallelism for Stencil Computations}, | ||
+ | year={2016}, | ||
+ | url={http:// | ||
+ | volume={PP}, | ||
+ | number={99}, | ||
+ | pages={1-1}, | ||
+ | keywords={Diamond;Face;Indexes;Optimization;Parallel processing;Shape;Silicon;Compilers;locality;loop tiling;parallelism; | ||
+ | doi={10.1109/ | ||
+ | ISSN={1045-9219}, | ||
+ | month={}, | ||
+ | |||
+ | @ARTICLE{7155440, | ||
+ | author={T. Nowatzki and J. Menon and C. H. Ho and K. Sankaralingam}, | ||
+ | journal={IEEE Micro}, | ||
+ | title={Architectural Simulators Considered Harmful}, | ||
+ | year={2015}, | ||
+ | url={http:// | ||
+ | volume={35}, | ||
+ | number={6}, | ||
+ | pages={4-12}, | ||
+ | keywords={computer architecture;digital simulation;architectural layers;architectural simulators;black boxes;evaluation standard recalibration; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0272-1732}, | ||
+ | month={Nov}, | ||
+ | |||
+ | @INPROCEEDINGS{7349585, | ||
+ | author={J. D. Garvey and T. S. Abdelrahman}, | ||
+ | booktitle={2015 44th International Conference on Parallel Processing}, | ||
+ | title={Automatic Performance Tuning of Stencil Computations on GPUs}, | ||
+ | year={2015}, | ||
+ | pages={300-309}, | ||
+ | keywords={graphics processing units; | ||
+ | doi={10.1109/ | ||
+ | ISSN={0190-3918}, | ||
+ | month={Sept}, | ||
+ | |||
+ | @article{DBLPSteve, | ||
+ | author | ||
+ | Cliff Woolley and | ||
+ | | ||
+ | | ||
+ | John Tran and | ||
+ | Bryan Catanzaro and | ||
+ | Evan Shelhamer}, | ||
+ | title = {cuDNN: Efficient Primitives for Deep Learning}, | ||
+ | journal | ||
+ | volume | ||
+ | year = {2014}, | ||
+ | url = {http:// | ||
+ | timestamp = {Sun, 02 Nov 2014 11:25:59 +0100}, | ||
+ | biburl | ||
+ | bibsource = {dblp computer science bibliography, | ||
+ | } | ||
+ | |||
+ | @article{Bao: | ||
+ | | ||
+ | title = {Static and Dynamic Frequency Scaling on Multicore CPUs}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = dec, | ||
+ | year = {2016}, | ||
+ | issn = {1544-3566}, | ||
+ | pages = {51: | ||
+ | | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {3011017}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Pouchet: | ||
+ | | ||
+ | title = {Polyhedral-based Data Reuse Optimization for Configurable Computing}, | ||
+ | | ||
+ | | ||
+ | year = {2013}, | ||
+ | isbn = {978-1-4503-1887-7}, | ||
+ | | ||
+ | pages = {29--38}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {2435273}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{Kong: | ||
+ | | ||
+ | title = {When Polyhedral Transformations Meet SIMD Code Generation}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | month = jun, | ||
+ | year = {2013}, | ||
+ | issn = {0362-1340}, | ||
+ | pages = {127--138}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {2462187}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{cummins2017synthesizing, | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | @article{optimistic2017, | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | | ||
+ | |||
+ | } | ||
+ | |||
+ | @inbook{e0cc7363fd684a529d1ba82b8195d530, | ||
+ | title = " | ||
+ | keywords | ||
+ | author | ||
+ | note = "Date of Acceptance: 25/ | ||
+ | year = " | ||
+ | month = " | ||
+ | booktitle = "The International Symposium on Code Generation and Optimization (CGO) 2017", | ||
+ | } | ||
+ | |||
+ | BibTeX | EndNote | ACM Ref | ||
+ | |||
+ | @inproceedings{Putnam: | ||
+ | | ||
+ | title = {A Reconfigurable Fabric for Accelerating Large-scale Datacenter Services}, | ||
+ | | ||
+ | | ||
+ | year = {2014}, | ||
+ | isbn = {978-1-4799-4394-4}, | ||
+ | | ||
+ | pages = {13--24}, | ||
+ | | ||
+ | url = {http:// | ||
+ | acmid = {2665678}, | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | [download] | ||
+ | |||
+ | @miscellaneous{accelerating-deep-convolutional-neural-networks-using-specialized-hardware, | ||
+ | author = {Kalin Ovtcharov, Olatunji Ruwase, Joo-Young Kim, Jeremy Fowers, Karin Strauss, Eric Chung}, | ||
+ | title = {Accelerating Deep Convolutional Neural Networks Using Specialized Hardware}, | ||
+ | booktitle = {}, | ||
+ | year = {2015}, | ||
+ | month = {February}, | ||
+ | abstract = { | ||
+ | |||
+ | We describe the design of a convolutional neural network accelerator running on a Stratix V FPGA. The design runs at three times the throughput of previous FPGA CNN accelerator designs. We show that the throughput/ | ||
+ | |||
+ | |||
+ | }, | ||
+ | publisher = {Microsoft Research}, | ||
+ | url = {https:// | ||
+ | address = {}, | ||
+ | pages = {}, | ||
+ | journal = {}, | ||
+ | volume = {}, | ||
+ | chapter = {}, | ||
+ | isbn = {}, | ||
+ | } | ||
+ | |||
+ | @inproceedings{Deitz: | ||
+ | | ||
+ | title = {Eliminating Redundancies in Sum-of-product Array Computations}, | ||
+ | | ||
+ | | ||
+ | year = {2001}, | ||
+ | isbn = {1-58113-410-X}, | ||
+ | | ||
+ | pages = {65--77}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {377807}, | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Basu: | ||
+ | | ||
+ | title = {Compiler-Directed Transformation for Higher-Order Stencils}, | ||
+ | | ||
+ | | ||
+ | year = {2015}, | ||
+ | isbn = {978-1-4799-8649-1}, | ||
+ | pages = {313--323}, | ||
+ | | ||
+ | url = {http://dx.doi.org/ | ||
+ | doi = {10.1109/ | ||
+ | acmid = {2863932}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @inproceedings{Putnam: | ||
+ | | ||
+ | title = {CHiMPS: A High-level Compilation Flow for Hybrid CPU-FPGA Architectures}, | ||
+ | | ||
+ | | ||
+ | year = {2008}, | ||
+ | isbn = {978-1-59593-934-0}, | ||
+ | | ||
+ | pages = {261--261}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {1344720}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | @inproceedings{Wong: | ||
+ | | ||
+ | title = {Comparing FPGA vs. Custom Cmos and the Impact on Processor Microarchitecture}, | ||
+ | | ||
+ | | ||
+ | year = {2011}, | ||
+ | isbn = {978-1-4503-0554-9}, | ||
+ | | ||
+ | pages = {5--14}, | ||
+ | | ||
+ | url = {http:// | ||
+ | doi = {10.1145/ | ||
+ | acmid = {1950419}, | ||
+ | | ||
+ | | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{DBLP: | ||
+ | author | ||
+ | | ||
+ | Ivo Danihelka and | ||
+ | Marc Lanctot and | ||
+ | Alex Graves}, | ||
+ | title = {Memory-Efficient Backpropagation Through Time}, | ||
+ | journal | ||
+ | volume | ||
+ | year = {2016}, | ||
+ | url = {http:// | ||
+ | timestamp = {Fri, 01 Jul 2016 17:39:49 +0200}, | ||
+ | biburl | ||
+ | bibsource = {dblp computer science bibliography, | ||
+ | } | ||
+ | |||
+ | @inproceedings{FlowMap1994, | ||
+ | | ||
+ | title = {FlowMap: an optimal technology mapping algorithm for delay optimization in lookup-table based FPGA designs}, | ||
+ | | ||
+ | year = {1994}, | ||
+ | isbn = {1937-4151}, | ||
+ | pages = {1-12}, | ||
+ | url = {http:// | ||
+ | doi = {10.1109/ | ||
+ | | ||
+ | } | ||
+ | |||
+ | @article{MILROY20161589, | ||
+ | title = " | ||
+ | journal = " | ||
+ | volume = " | ||
+ | number = "", | ||
+ | pages = "1589 - 1600", | ||
+ | year = " | ||
+ | note = "", | ||
+ | issn = " | ||
+ | doi = " | ||
+ | url = " | ||
+ | author = " | ||
+ | keywords = " | ||
+ | keywords = "CESM Ensemble Consistency Test", | ||
+ | keywords = " | ||
+ | keywords = "code modification as source of variability", | ||
+ | keywords = " | ||
+ | keywords = " | ||
+ | keywords = " | ||
+ | keywords = "Fused Multiply-Add" | ||
+ | } |