Both sides previous revision
Previous revision
Next revision
|
Previous revision
Next revision
Both sides next revision
|
melange:papers:spring2018 [2018/02/09 12:18] prerana |
melange:papers:spring2018 [2018/02/21 11:36] prerana |
number={1}, | number={1}, |
pages={127-138}, | pages={127-138}, |
keywords={DRAM chips;data flow computing;energy conservation;feedforward neural nets;learning (artificial intelligence);neural net architecture;power aware computing;reconfigurable architectures;AI systems;AlexNet;CNN shapes;DRAM accesses;Eyeriss;MAC;RS dataflow reconfiguration;accelerator chip;convolutional layers;data movement energy cost;dataflow processing;deep convolutional neural networks;energy efficiency;energy-efficient reconfigurable accelerator;multiply and accumulation;off-chip DRAM;reconfiguring architecture;row stationary;spatial architecture;Clocks;Computer architecture;Hardware;Neural networks;Random access memory;Shape;Throughput;Convolutional neural networks (CNNs);dataflow processing;deep learning;energy-efficient accelerators;spatial architecture}, | keywords={DRAM chips;data flow computing;energy conservation;feedforward neural nets;learning (artificial intelligence);neural net architecture;power aware computing;reconfigurable architectures;AI systems;AlexNet;CNN shapes;DRAM accesses;Eyeriss;MAC;RS dataflow reconfiguration;accelerator chip;convolutional layers;data movement energy cost;dataflow processing;deep convolutional neural networks;energy efficiency;energy-efficient reconfigurable accelerator;multiply and accumulation;off-chip DRAM;reconfiguring architecture;row stationary;spatial architecture;Clocks;Computer architecture;Hardware;Neural networks;Random access memory;Shape;Throughput;Convolutional neural networks (CNNs);dataflow processing;deep learning;energy-efficient accelerators;spatial architecture}, |
| url = {http://ieeexplore.ieee.org/document/7738524/}, |
doi={10.1109/JSSC.2016.2616357}, | doi={10.1109/JSSC.2016.2616357}, |
ISSN={0018-9200}, | ISSN={0018-9200}, |
month={Jan},} | month={Jan},} |
| |
@ARTICLE{88484, | @inproceedings{Baxter:1989:RPS:72935.72967, |
author={J. H. Saltz and R. Mirchandaney and K. Crowley}, | author = {Baxter, D. and Mirchandaney, R. and Saltz, J. H.}, |
journal={IEEE Transactions on Computers}, | title = {Run-time Parallelization and Scheduling of Loops}, |
title={Run-time parallelization and scheduling of loops}, | booktitle = {Proceedings of the First Annual ACM Symposium on Parallel Algorithms and Architectures}, |
year={1991}, | series = {SPAA '89}, |
volume={40}, | year = {1989}, |
number={5}, | isbn = {0-89791-323-X}, |
pages={603-612}, | location = {Santa Fe, New Mexico, USA}, |
keywords={parallel programming;scheduling;Encore Multimax;automatic parallelization;automatic scheduling;compile-time information;concurrently executable loop iterations;do loop;execution time preprocessing;executors;inspector procedures;loop dependency analysis;loop indexes;run-time methods;run-time reordering;source code loop structures;symbolic transformation rules;transformed versions;wavefronts;Computer science;Costs;Failure analysis;Level set;NASA;Parallel processing;Performance analysis;Processor scheduling;Runtime}, | pages = {303--312}, |
doi={10.1109/12.88484}, | numpages = {10}, |
ISSN={0018-9340}, | url = {http://doi.acm.org/10.1145/72935.72967}, |
month={May},} | doi = {10.1145/72935.72967}, |
| acmid = {72967}, |
| publisher = {ACM}, |
| address = {New York, NY, USA}, |
| } |
| |
BibTeX | EndNote | ACM Ref | BibTeX | EndNote | ACM Ref |
} | } |
| |
@inproceedings{sanjay-fsttcs86, | @inproceedings{Rajopadhye:1986:SSA:646824.706926, |
Address = {New Delhi, India}, | author = {Rajopadhye, Sanjay V. and Purushothaman, S. and Fujimoto, Richard}, |
Author = {Rajopadhye, S. V. and Purushothaman, S. and Fujimoto, R. M.}, | title = {On Synthesizing Systolic Arrays from Recurrence Equations with Linear Dependencies}, |
Booktitle = {Proceedings, Sixth Conference on Foundations of Software | booktitle = {Proceedings of the Sixth Conference on Foundations of Software Technology and Theoretical Computer Science}, |
Technology and Theoretical Computer Science}, | year = {1986}, |
Key = {Rajopadhye86b}, | isbn = {3-540-17179-7}, |
Month = {December}, | pages = {488--503}, |
Pages = {488-503}, | numpages = {16}, |
Publisher = {Springer Verlag, LNCS~241}, | url = {http://dl.acm.org/citation.cfm?id=646824.706926}, |
Title = {On Synthesizing Systolic Arrays from Recurrence Equations | acmid = {706926}, |
with Linear Dependencies}, | publisher = {Springer-Verlag}, |
Year = {1986}} | address = {London, UK, UK}, |
| } |
| |
@article{quinton-jvsp89, | @Article{Quinton1989, |
Author = {Quinton, P. and {Van Dongen}, V.}, | author="Quinton, Patrice |
Journal = {Journal of {VLSI} Signal Processing}, | and van Dongen, Vincent", |
Number = 2, | title="The mapping of linear recurrence equations on regular arrays", |
Pages = {95-113}, | journal="Journal of VLSI signal processing systems for signal, image and video technology", |
Publisher = {Kluwer Academic Publishers, Boston}, | year="1989", |
Title = {The Mapping of Linear Recurrence Equations on Regular Arrays}, | month="Oct", |
Volume = 1, | day="01", |
Year = 1989} | volume="1", |
| number="2", |
| pages="95--113", |
| abstract="The parallelization of many algorithms can be obtained using space-time transformations which are applied on nested do-loops or on recurrence equations. In this paper, we analyze systems of linear recurrence equations, a generalization of uniform recurrence equations. The first part of the paper describes a method for finding automatically whether such a system can be scheduled by an affine timing function, independent of the size parameter of the algorithm. In the second part, we describe a powerful method that makes it possible to transform linear recurrences into uniform recurrence equations. Both parts rely on results on integral convex polyhedra. Our results are illustrated on the Gauss elimination algorithm and on the Gauss-Jordan diagonalization algorithm.", |
| issn="0922-5773", |
| doi="10.1007/BF02477176", |
| url="https://doi.org/10.1007/BF02477176" |
| } |
| |
@Article{feautrier91, | @Article{feautrier91, |
pages= "23-53", | pages= "23-53", |
month= "Feb", | month= "Feb", |
| url= "http://www.cs.colostate.edu/~cs560/Spring2011/Notes/FeautrierEDFAijpp91.pdf" |
annote= "This article explains how a simple imperative language | annote= "This article explains how a simple imperative language |
program (consisting only of assignments, for loops with affine loop | program (consisting only of assignments, for loops with affine loop |
year = {1992}, | year = {1992}, |
pages = {313-347}, | pages = {313-347}, |
ee = {http://dx.doi.org/10.1007/BF01407835} | url = {http://dx.doi.org/10.1007/BF01407835} |
} | } |
| |
year = {1992}, | year = {1992}, |
pages = {389-420}, | pages = {389-420}, |
ee = {http://dx.doi.org/10.1007/BF01379404} | url = {http://dx.doi.org/10.1007/BF01379404} |
} | } |
| |
number = 2, | number = 2, |
pages = {210-226}, | pages = {210-226}, |
| url = {http://doi.acm.org/10.1145/209937.209947}, |
month = {Feb}} | month = {Feb}} |
| |
address = {Paphos, Cyprus}, | address = {Paphos, Cyprus}, |
pages = {283--303}, | pages = {283--303}, |
| url = {http://dx.doi.org/10.1007/978-3-642-11970-5_16}, |
month = Mar, | month = Mar, |
publisher = {Springer-Verlag}, | publisher = {Springer-Verlag}, |
| |
@ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam | @ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam |
and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, doi = {http://web.cse.ohio-state.edu/~pouchet.2/doc/cpc-article.10.pdf} ,} | and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, url = {http://web.cs.ucla.edu/~pouchet/doc/cpc-article.10.pdf}, doi = {http://web.cse.ohio-state.edu/~pouchet.2/doc/cpc-article.10.pdf} ,} |
| |
| |
year={2011}, | year={2011}, |
pages={35-42}, | pages={35-42}, |
keywords={floating point arithmetic;matrix multiplication;GFLOPS-W;application-specific custom hardware;floating point operations per second;linear algebra core;matrix computations;matrix-matrix multiplication;power consumption reduction;technology scaling;Bandwidth;Computer architecture;Hardware;Kernel;Linear algebra;Program processors;Registers}, | keywords={floating point arithmetic;matrix multiplication;GFLOPS-W;application-specific custom hardware;floating point operations per second;linear algebra core;matrix computations;matrix-matrix multiplication;power consumption reduction;technology scaling;Bandwidth;Computer architecture;Hardware;Kernel;Linear algebra;Program processors;Registers}, |
| url = {http://ieeexplore.ieee.org/document/6043234/}, |
doi={10.1109/ASAP.2011.6043234}, | doi={10.1109/ASAP.2011.6043234}, |
ISSN={1063-6862}, | ISSN={1063-6862}, |
pages={300-309}, | pages={300-309}, |
keywords={graphics processing units;learning (artificial intelligence);parallel processing;storage management;Nvidia GTX Titan GPU;OpenCL stencil kernel;automatic performance tuning;graphics processing unit;machine learning;optimization;random sampling;stencil computation;Graphics processing units;Instruction sets;Kernel;Merging;Optimization;Parallel processing;Yttrium;GPGPU;auto-tuning;machine learning;stencil}, | keywords={graphics processing units;learning (artificial intelligence);parallel processing;storage management;Nvidia GTX Titan GPU;OpenCL stencil kernel;automatic performance tuning;graphics processing unit;machine learning;optimization;random sampling;stencil computation;Graphics processing units;Instruction sets;Kernel;Merging;Optimization;Parallel processing;Yttrium;GPGPU;auto-tuning;machine learning;stencil}, |
doi={10.1109/ICPP.2015.39}, | doi={10.1109/ICPP.2015.39}, |
| url = {http://ieeexplore.ieee.org/document/7349585/}, |
ISSN={0190-3918}, | ISSN={0190-3918}, |
month={Sept},} | month={Sept},} |
| |
author={Doerfert, Johannes and Grosser, Tobias and Hack, Sebastian}, | author={Doerfert, Johannes and Grosser, Tobias and Hack, Sebastian}, |
| url = {http://dl.acm.org/citation.cfm?id=3049832.3049864}, |
year={2017} | year={2017} |
| |
} | } |
| |
@inbook{e0cc7363fd684a529d1ba82b8195d530, | @inproceedings{Ogilvie:2017:MCI:3049832.3049859, |
title = "Minimizing the cost of iterative compilation with active learning", | author = {Ogilvie, William F. and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh}, |
keywords = "Active Learning, Compilers, Iterative Compilation, Machine Learning, Sequential Analysis;", | title = {Minimizing the Cost of Iterative Compilation with Active Learning}, |
author = "William Ogilvie and Pavlos Petoumenos and Zheng Wang and Hugh Leather", | booktitle = {Proceedings of the 2017 International Symposium on Code Generation and Optimization}, |
note = "Date of Acceptance: 25/10/2016", | series = {CGO '17}, |
year = "2016", | year = {2017}, |
month = "10", | isbn = {978-1-5090-4931-8}, |
booktitle = "The International Symposium on Code Generation and Optimization (CGO) 2017", | location = {Austin, USA}, |
| pages = {245--256}, |
| numpages = {12}, |
| url = {http://dl.acm.org/citation.cfm?id=3049832.3049859}, |
| acmid = {3049859}, |
| publisher = {IEEE Press}, |
| address = {Piscataway, NJ, USA}, |
| keywords = {Active Learning, Compilers, Iterative Compilation, Machine Learning, Sequential Analysis}, |
} | } |
| |
year = {2015}, | year = {2015}, |
month = {February}, | month = {February}, |
| url = {https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/CNN20Whitepaper.pdf}, |
abstract = { | abstract = { |
| |