User Tools

Site Tools


melange:papers:spring2018

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
melange:papers:spring2018 [2018/02/21 11:03]
prerana
melange:papers:spring2018 [2018/04/18 10:30] (current)
prerana
Line 1: Line 1:
 @article{Bielecki:2016:TAN:3060371.3060383, @article{Bielecki:2016:TAN:3060371.3060383,
- author = {Bielecki, W\lodzimierz and Pa\lkowski, Marek},+ author = {Bielecki, Wlodzimierz and Pa\lkowski, Marek},
  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},
  journal = {Int. J. Appl. Math. Comput. Sci.},  journal = {Int. J. Appl. Math. Comput. Sci.},
Line 48: Line 48:
 number="6", number="6",
 pages="607--631", pages="607--631",
-abstract="There are many algorithms for the space-time mapping of nested loops. Some of them even make the optimal choices within their framework. We propose a preprocessing phase for algorithms in the polytope model, which extends the model and yields space-time mappings whose schedule is, in some cases, orders of magnitude faster. These are cases in which the dependence graph has small irregularities. The basic idea is to split the index set of the loop nests into parts with a regular dependence structure and apply the existing space-time mapping algorithms to these parts individually. This work is based on a seminal idea in the more limited context of loop parallelization at the code level. We elevate the idea to the model level (our model is the polytope model), which increases its applicability by providing a clearer and wider range of choices at an acceptable analysis cost. Index set splitting is one facet in the effort to extend the power of the polytope model and to enable the generation of competitive target code.", 
-issn="1573-7640", 
 doi="10.1023/A:1007516818651", doi="10.1023/A:1007516818651",
 url="https://doi.org/10.1023/A:1007516818651" url="https://doi.org/10.1023/A:1007516818651"
Line 79: Line 77:
 number={1},  number={1}, 
 pages={127-138},  pages={127-138}, 
-keywords={DRAM chips;data flow computing;energy conservation;feedforward neural nets;learning (artificial intelligence);neural net architecture;power aware computing;reconfigurable architectures;AI systems;AlexNet;CNN shapes;DRAM accesses;Eyeriss;MAC;RS dataflow reconfiguration;accelerator chip;convolutional layers;data movement energy cost;dataflow processing;deep convolutional neural networks;energy efficiency;energy-efficient reconfigurable accelerator;multiply and accumulation;off-chip DRAM;reconfiguring architecture;row stationary;spatial architecture;Clocks;Computer architecture;Hardware;Neural networks;Random access memory;Shape;Throughput;Convolutional neural networks (CNNs);dataflow processing;deep learning;energy-efficient accelerators;spatial architecture}, +url = {http://ieeexplore.ieee.org/document/7738524/}, 
 doi={10.1109/JSSC.2016.2616357},  doi={10.1109/JSSC.2016.2616357}, 
 ISSN={0018-9200},  ISSN={0018-9200}, 
 month={Jan},} month={Jan},}
  
-@ARTICLE{88484,  +@inproceedings{Baxter:1989:RPS:72935.72967
-author={JH. Saltz and R. Mirchandaney and KCrowley},  + author = {Baxter, D. and Mirchandaney, R. and Saltz, J. H.}, 
-journal={IEEE Transactions on Computers},  + title = {Run-time Parallelization and Scheduling of Loops}, 
-title={Run-time parallelization and scheduling of loops},  + booktitle = {Proceedings of the First Annual ACM Symposium on Parallel Algorithms and Architectures}, 
-year={1991},  + series = {SPAA '89}, 
-volume={40},  + year = {1989}, 
-number={5},  + isbn = {0-89791-323-X}, 
-pages={603-612},  + location = {Santa Fe, New Mexico, USA}, 
-keywords={parallel programming;scheduling;Encore Multimax;automatic parallelization;automatic scheduling;compile-time information;concurrently executable loop iterations;do loop;execution time preprocessing;executors;inspector procedures;loop dependency analysis;loop indexes;run-time methods;run-time reordering;source code loop structures;symbolic transformation rules;transformed versions;wavefronts;Computer science;Costs;Failure analysis;Level set;NASA;Parallel processing;Performance analysis;Processor scheduling;Runtime},  + pages = {303--312}, 
-doi={10.1109/12.88484},  + numpages = {10}, 
-ISSN={0018-9340},  + url = {http://doi.acm.org/10.1145/72935.72967}, 
-month={May},}+ doi = {10.1145/72935.72967}, 
 + acmid = {72967}, 
 + publisher = {ACM}, 
 + address = {New York, NY, USA}, 
 +}
  
-BibTeX | EndNote | ACM Ref 
 @article{Feautrier:2006:SSS:1217445.1217447, @article{Feautrier:2006:SSS:1217445.1217447,
  author = {Feautrier, Paul},  author = {Feautrier, Paul},
Line 110: Line 111:
  pages = {459--487},  pages = {459--487},
  numpages = {29},  numpages = {29},
- url = {http://dx.doi.org/10.1007/s10766-006-0011-4},+ url = {https://link.springer.com/content/pdf/10.1007%2Fs10766-006-0011-4.pdf},
  doi = {10.1007/s10766-006-0011-4},  doi = {10.1007/s10766-006-0011-4},
  acmid = {1217447},  acmid = {1217447},
Line 157: Line 158:
 } }
  
-@inproceedings{sanjay-fsttcs86, +@inproceedings{Rajopadhye:1986:SSA:646824.706926
-        Address = {New Delhi, India}+ author = {Rajopadhye, Sanjay V. and Purushothaman, S. and Fujimoto, Richard}, 
-        Author = {Rajopadhye, S. V. and Purushothaman, S. and Fujimoto, R. M.}, + title = {On Synthesizing Systolic Arrays from Recurrence Equations with Linear Dependencies}, 
-        Booktitle = {Proceedings, Sixth Conference on Foundations of Software + booktitle = {Proceedings of the Sixth Conference on Foundations of Software Technology and Theoretical Computer Science}, 
-                  Technology and Theoretical Computer Science}, + year = {1986}, 
-        Key = {Rajopadhye86b}, + isbn = {3-540-17179-7}, 
-        Month = {December}, + pages = {488--503}, 
-        Pages = {488-503}, + numpages = {16}, 
-        Publisher = {Springer VerlagLNCS~241}, + url = {https://www.cs.colostate.edu/%7Ecs560/Fall2015/Lectures/Sanjay1986.pdf}, 
-        Title = {On Synthesizing Systolic Arrays from Recurrence Equations + acmid = {706926}, 
-                  with Linear Dependencies}, + publisher = {Springer-Verlag}, 
-        Year = {1986}}+ address = {London, UK, UK}
 +}
  
-@article{quinton-jvsp89+@Article{Quinton1989
-        Author {Quinton, P. and {Van Dongen}V.}+author="Quinton, Patrice 
-        Journal {Journal of {VLSI} Signal Processing}+and van Dongen, Vincent"
-        Number = 2, +title="The mapping of linear recurrence equations on regular arrays", 
-        Pages {95-113}+journal="Journal of VLSI signal processing systems for signal, image and video technology", 
-        Publisher {Kluwer Academic Publishers, Boston}+year="1989", 
-        Title {The Mapping of Linear Recurrence Equations on Regular Arrays}+month="Oct", 
-        Volume 1, +day="01", 
-        Year = 1989}+volume="1"
 +number="2"
 +pages="95--113"
 +issn="0922-5773"
 +doi="10.1007/BF02477176"
 +url="https://doi.org/10.1007/BF02477176" 
 +}
  
 @Article{feautrier91, @Article{feautrier91,
-author=         "Feautrier, P.",+author= "Feautrier, P.",
 title=          "Dataflow analysis of array and scalar references", title=          "Dataflow analysis of array and scalar references",
 journal=        "International Journal of Parallel Programming", journal=        "International Journal of Parallel Programming",
Line 189: Line 197:
 pages=          "23-53", pages=          "23-53",
 month=          "Feb", month=          "Feb",
-annote        "This article explains how a simple imperative language +url           "http://www.cs.colostate.edu/~cs560/Spring2011/Notes/FeautrierEDFAijpp91.pdf"
-        program (consisting only of assignments, for loops with affine loop +
-        limits, and arrays with affine index expressions), can be statically +
-        analyzed to find the flow dependencies."+
 } }
  
 @article{feautrier92a, @article{feautrier92a,
-  author    = {Paul Feautrier}, +author    = {Paul Feautrier}, 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part I}.  One-dimensional Time}, 
-              {Part I}.  One-dimensional Time}, +journal   = {International Journal of Parallel Programming}, 
-  journal   = {International Journal of Parallel Programming}, +volume    = {21}, 
-  volume    = {21}, +number    = {5}, 
-  number    = {5}, +year      = {1992}, 
-  year      = {1992}, +pages     = {313-347}, 
-  pages     = {313-347}, +url        = {http://dx.doi.org/10.1007/BF01407835}
-  ee        = {http://dx.doi.org/10.1007/BF01407835}+
 } }
  
 @article{feautrier92b, @article{feautrier92b,
-  author    = {Paul Feautrier}, +author    = {Paul Feautrier}, 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part II}. Multidimensional Time}, 
-               {Part II}. Multidimensional Time}, +journal   = {International Journal of Parallel Programming}, 
-  journal   = {International Journal of Parallel Programming}, +volume    = {21}, 
-  volume    = {21}, +number    = {6}, 
-  number    = {6}, +year      = {1992}, 
-  year      = {1992}, +pages     = {389-420}, 
-  pages     = {389-420}, +url        = {http://dx.doi.org/10.1007/BF01379404}
-  ee        = {http://dx.doi.org/10.1007/BF01379404}+
 } }
  
 @Article{collard-etal-fuzzy-jpdc-1997, @Article{collard-etal-fuzzy-jpdc-1997,
-  author =       {Collard, J-F. and Barthou, D. and Feautrier, P.}, +author =       {Collard, J-F. and Barthou, D. and Feautrier, P.}, 
-  title =        {Fuzzy Array Data Flow Analysis}, +title =        {Fuzzy Array Data Flow Analysis}, 
-  journal =      {Journal of Parallel and Distributed Computing}, +journal =      {Journal of Parallel and Distributed Computing}, 
-  year =         1997, +year =         1997, 
-  volume =       40, +volume =       40, 
-  number =       2, +number =       2, 
-  pages =        {210-226}, +pages =        {210-226}, 
-  month =        {Feb}}+url =          {https://www.sciencedirect.com/science/article/pii/S0743731596912617}, 
 +month =        {Feb}}
  
  
 @InProceedings{BPCB10, @InProceedings{BPCB10,
-  author =        {Benabderrahmane, M.-W. and Pouchet, L.-N. and Cohen A. and +author =        {Benabderrahmane, M.-W. and Pouchet, L.-N. and Cohen A. and Bastoul, C.}, 
-                  Bastoul, C.}, +title =         {The Polyhedral Model Is More Widely Applicable Than You Think}, 
-  title =         {The Polyhedral Model Is More Widely Applicable Than You +booktitle =     {Proceedings of the International Conference on Compiler Construction ({ETAPS CC'10})}, 
-                  Think}, +year =          2010, 
-  booktitle =     {Proceedings of the International Conference on Compiler +series =        {LNCS}, 
-                  Construction ({ETAPS CC'10})}, +address =       {Paphos, Cyprus}, 
-  year =          2010, +pages =         {283--303}, 
-  series =        {LNCS}, +url =           {http://www.cs.colostate.edu/%7Epouchet/doc/cc-article.10.pdf}, 
-  address =       {Paphos, Cyprus}, +month =         Mar, 
-  pages =         {283--303}, +publisher =     {Springer-Verlag},
-  month =         Mar, +
-  publisher =     {Springer-Verlag},+
  
  
  
 @ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam @ARTICAL{123, author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam
-and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, doi = {http://web.cse.ohio-state.edu/~pouchet.2/doc/cpc-article.10.pdf} ,}+and P. Sadayappan}, title={Parametric Tiling of Affine Loop Nests}, year={2010}, url = {http://web.cs.ucla.edu/~pouchet/doc/cpc-article.10.pdf}, doi = {http://web.cse.ohio-state.edu/~pouchet.2/doc/cpc-article.10.pdf} ,}
  
  
Line 258: Line 260:
 year={2011},  year={2011}, 
 pages={35-42},  pages={35-42}, 
-keywords={floating point arithmetic;matrix multiplication;GFLOPS-W;application-specific custom hardware;floating point operations per second;linear algebra core;matrix computations;matrix-matrix multiplication;power consumption reduction;technology scaling;Bandwidth;Computer architecture;Hardware;Kernel;Linear algebra;Program processors;Registers}, +keywords={floating point arithmetic;matrix multiplication;GFLOPS-W;application-specific custom hardware;floating point operations per second;linear algebra core;matrix computations;matrix-matrix multiplication;power consumption reduction;technology scaling;Bandwidth;Computer architecture;Hardware;Kernel;Linear algebra;Program processors;Registers}, 
 +url = {http://ieeexplore.ieee.org/document/6043234/}, 
 doi={10.1109/ASAP.2011.6043234},  doi={10.1109/ASAP.2011.6043234}, 
 ISSN={1063-6862},  ISSN={1063-6862}, 
Line 266: Line 269:
  
 @inproceedings{Bandishti:2012:TSC:2388996.2389051, @inproceedings{Bandishti:2012:TSC:2388996.2389051,
- author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday}, +author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday}, 
- title = {Tiling Stencil Computations to Maximize Parallelism}, +title = {Tiling Stencil Computations to Maximize Parallelism}, 
- booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis}, +booktitle = {Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis}, 
- series = {SC '12}, +series = {SC '12}, 
- year = {2012}, +year = {2012}, 
- isbn = {978-1-4673-0804-5}, +isbn = {978-1-4673-0804-5}, 
- location = {Salt Lake City, Utah}, +location = {Salt Lake City, Utah}, 
- pages = {40:1--40:11}, +pages = {40:1--40:11}, 
- articleno = {40}, +articleno = {40}, 
- numpages = {11}, +numpages = {11}, 
- url = {http://dl.acm.org/citation.cfm?id=2388996.2389051}, +url = {http://dl.acm.org/citation.cfm?id=2388996.2389051}, 
- acmid = {2389051}, +acmid = {2389051}, 
- publisher = {IEEE Computer Society Press}, +publisher = {IEEE Computer Society Press}, 
- address = {Los Alamitos, CA, USA}, +address = {Los Alamitos, CA, USA}, 
- keywords = {compilers, program transformation},+keywords = {compilers, program transformation},
  
  
Line 318: Line 321:
 pages={300-309},  pages={300-309}, 
 keywords={graphics processing units;learning (artificial intelligence);parallel processing;storage management;Nvidia GTX Titan GPU;OpenCL stencil kernel;automatic performance tuning;graphics processing unit;machine learning;optimization;random sampling;stencil computation;Graphics processing units;Instruction sets;Kernel;Merging;Optimization;Parallel processing;Yttrium;GPGPU;auto-tuning;machine learning;stencil},  keywords={graphics processing units;learning (artificial intelligence);parallel processing;storage management;Nvidia GTX Titan GPU;OpenCL stencil kernel;automatic performance tuning;graphics processing unit;machine learning;optimization;random sampling;stencil computation;Graphics processing units;Instruction sets;Kernel;Merging;Optimization;Parallel processing;Yttrium;GPGPU;auto-tuning;machine learning;stencil}, 
-doi={10.1109/ICPP.2015.39}, +doi={10.1109/ICPP.2015.39}, 
 +url = {http://ieeexplore.ieee.org/document/7349585/}, 
 ISSN={0190-3918},  ISSN={0190-3918}, 
 month={Sept},} month={Sept},}
  
 @article{DBLPSteve, @article{DBLPSteve,
-  author    = {Sharan Chetlur and + author    = {Sharan Chetlur and Cliff Woolley and Philippe Vandermersch and Jonathan Cohen and          John Tran and Bryan Catanzaro and Evan Shelhamer}, 
-               Cliff Woolley and +title     = {cuDNN: Efficient Primitives for Deep Learning}, 
-               Philippe Vandermersch and +journal   = {CoRR}, 
-               Jonathan Cohen and +volume    = {abs/1410.0759}, 
-               John Tran and +year      = {2014}, 
-               Bryan Catanzaro and +url       = {http://arxiv.org/abs/1410.0759}, 
-               Evan Shelhamer}, +timestamp = {Sun, 02 Nov 2014 11:25:59 +0100}, 
-  title     = {cuDNN: Efficient Primitives for Deep Learning}, +biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/ChetlurWVCTCS14}, 
-  journal   = {CoRR}, +bibsource = {dblp computer science bibliography, http://dblp.org}
-  volume    = {abs/1410.0759}, +
-  year      = {2014}, +
-  url       = {http://arxiv.org/abs/1410.0759}, +
-  timestamp = {Sun, 02 Nov 2014 11:25:59 +0100}, +
-  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/ChetlurWVCTCS14}, +
-  bibsource = {dblp computer science bibliography, http://dblp.org}+
 } }
  
 @article{Bao:2016:SDF:3012405.3011017, @article{Bao:2016:SDF:3012405.3011017,
- author = {Bao, Wenlei and Hong, Changwan and Chunduri, Sudheer and Krishnamoorthy, Sriram and Pouchet, Louis-Noel and Rastello, Fabrice and Sadayappan, P.}, +author = {Bao, Wenlei and Hong, Changwan and Chunduri, Sudheer and Krishnamoorthy, Sriram and Pouchet, Louis-Noel and Rastello, Fabrice and Sadayappan, P.}, 
- title = {Static and Dynamic Frequency Scaling on Multicore CPUs}, +title = {Static and Dynamic Frequency Scaling on Multicore CPUs}, 
- journal = {ACM Trans. Archit. Code Optim.}, +journal = {ACM Trans. Archit. Code Optim.}, 
- issue_date = {December 2016}, +issue_date = {December 2016}, 
- volume = {13}, +volume = {13}, 
- number = {4}, +number = {4}, 
- month = dec, +month = dec, 
- year = {2016}, +year = {2016}, 
- issn = {1544-3566}, +issn = {1544-3566}, 
- pages = {51:1--51:26}, +pages = {51:1--51:26}, 
- articleno = {51}, +articleno = {51}, 
- numpages = {26}, +numpages = {26}, 
- url = {http://doi.acm.org/10.1145/3011017}, +url = {http://doi.acm.org/10.1145/3011017}, 
- doi = {10.1145/3011017}, +doi = {10.1145/3011017}, 
- acmid = {3011017}, +acmid = {3011017}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA}, +address = {New York, NY, USA}, 
- keywords = {Affine Programs, CPU Energy, Static Analysis, Voltage and Frequency Scaling},+keywords = {Affine Programs, CPU Energy, Static Analysis, Voltage and Frequency Scaling},
  
  
 @inproceedings{Pouchet:2013:PDR:2435264.2435273, @inproceedings{Pouchet:2013:PDR:2435264.2435273,
- author = {Pouchet, Louis-Noel and Zhang, Peng and Sadayappan, P. and Cong, Jason}, +author = {Pouchet, Louis-Noel and Zhang, Peng and Sadayappan, P. and Cong, Jason}, 
- title = {Polyhedral-based Data Reuse Optimization for Configurable Computing}, +title = {Polyhedral-based Data Reuse Optimization for Configurable Computing}, 
- booktitle = {Proceedings of the ACM/SIGDA International Symposium on Field Programmable Gate Arrays}, +booktitle = {Proceedings of the ACM/SIGDA International Symposium on Field Programmable Gate Arrays}, 
- series = {FPGA '13}, +series = {FPGA '13}, 
- year = {2013}, +year = {2013}, 
- isbn = {978-1-4503-1887-7}, +isbn = {978-1-4503-1887-7}, 
- location = {Monterey, California, USA}, +location = {Monterey, California, USA}, 
- pages = {29--38}, +pages = {29--38}, 
- numpages = {10}, +numpages = {10}, 
- url = {http://doi.acm.org/10.1145/2435264.2435273}, +url = {http://doi.acm.org/10.1145/2435264.2435273}, 
- doi = {10.1145/2435264.2435273}, +doi = {10.1145/2435264.2435273}, 
- acmid = {2435273}, +acmid = {2435273}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA}, +address = {New York, NY, USA}, 
- keywords = {compilation, data reuse, high-level synthesis, program transformations},+keywords = {compilation, data reuse, high-level synthesis, program transformations},
  
  
 @article{Kong:2013:PTM:2499370.2462187, @article{Kong:2013:PTM:2499370.2462187,
- author = {Kong, Martin and Veras, Richard and Stock, Kevin and Franchetti, Franz and Pouchet, Louis-No\"{e}l and Sadayappan, P.}, +author = {Kong, Martin and Veras, Richard and Stock, Kevin and Franchetti, Franz and Pouchet, Louis-No\"{e}l and Sadayappan, P.}, 
- title = {When Polyhedral Transformations Meet SIMD Code Generation}, +title = {When Polyhedral Transformations Meet SIMD Code Generation}, 
- journal = {SIGPLAN Not.}, +journal = {SIGPLAN Not.}, 
- issue_date = {June 2013}, +issue_date = {June 2013}, 
- volume = {48}, +volume = {48}, 
- number = {6}, +number = {6}, 
- month = jun, +month = jun, 
- year = {2013}, +year = {2013}, 
- issn = {0362-1340}, +issn = {0362-1340}, 
- pages = {127--138}, +pages = {127--138}, 
- numpages = {12}, +numpages = {12}, 
- url = {http://doi.acm.org/10.1145/2499370.2462187}, +url = {http://doi.acm.org/10.1145/2499370.2462187}, 
- doi = {10.1145/2499370.2462187}, +doi = {10.1145/2499370.2462187}, 
- acmid = {2462187}, +acmid = {2462187}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA}, +address = {New York, NY, USA}, 
- keywords = {affine scheduling, autotuning, compiler optimization, loop transformations, program synthesis},+keywords = {affine scheduling, autotuning, compiler optimization, loop transformations, program synthesis},
  
  
 @article{cummins2017synthesizing, @article{cummins2017synthesizing,
- +title={Synthesizing benchmarks for predictive modeling}, 
-   title={Synthesizing benchmarks for predictive modeling}, +author={Cummins, Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh}, 
- +year={2017}, 
-   author={Cummins, Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh}, +url={http://homepages.inf.ed.ac.uk/hleather/publications/2017-benchsynth-cgo.pdf}
- +
-   year={2017}, +
-    +
-   url={http://homepages.inf.ed.ac.uk/hleather/publications/2017-benchsynth-cgo.pdf} +
 } }
  
 @article{optimistic2017, @article{optimistic2017,
- +title={Optimistic Loop Optimization}, 
-   title={Optimistic Loop Optimization}, +author={Doerfert, Johannes and Grosser, Tobias and Hack, Sebastian}, 
- +url = {http://dl.acm.org/citation.cfm?id=3049832.3049864}, 
-   author={Doerfert, Johannes and Grosser, Tobias and Hack, Sebastian}, +year={2017}
-   url = {http://dl.acm.org/citation.cfm?id=3049832.3049864}, +
-   year={2017} +
 } }
  
-@inbook{e0cc7363fd684a529d1ba82b8195d530+@inproceedings{Ogilvie:2017:MCI:3049832.3049859, 
-  title     "Minimizing the cost of iterative compilation with active learning"+author = {Ogilvie, William F. and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh}
-  keywords  "Active LearningCompilersIterative CompilationMachine LearningSequential Analysis;"+title = {Minimizing the Cost of Iterative Compilation with Active Learning}
-  author    "William Ogilvie and Pavlos Petoumenos and Zheng Wang and Hugh Leather"+booktitle {Proceedings of the 2017 International Symposium on Code Generation and Optimization}, 
-  note      "Date of Acceptance25/10/2016"+series = {CGO '17}, 
-  year      "2016"+year = {2017}, 
-  month     "10"+isbn = {978-1-5090-4931-8}, 
-  booktitle "The International Symposium on Code Generation and Optimization (CGO) 2017",+location = {Austin, USA}
 +pages {245--256}
 +numpages {12}, 
 +url = {http://dl.acm.org/citation.cfm?id=3049832.3049859}
 +acmid {3049859}
 +publisher {IEEE Press}
 +address {Piscataway, NJ, USA}, 
 +keywords = {Active Learning, Compilers, Iterative Compilation, Machine Learning, Sequential Analysis},
 } }
  
Line 434: Line 431:
  
 @inproceedings{Putnam:2014:RFA:2665671.2665678, @inproceedings{Putnam:2014:RFA:2665671.2665678,
- author = {Putnam, Andrew and Caulfield, Adrian M. and Chung, Eric S. and Chiou, Derek and Constantinides, Kypros and Demme, John and Esmaeilzadeh, Hadi and Fowers, Jeremy and Gopal, Gopi Prashanth and Gray, Jan and Haselman, Michael and Hauck, Scott and Heil, Stephen and Hormati, Amir and Kim, Joo-Young and Lanka, Sitaram and Larus, James and Peterson, Eric and Pope, Simon and Smith, Aaron and Thong, Jason and Xiao, Phillip Yi and Burger, Doug}, +author = {Putnam, Andrew and Caulfield, Adrian M. and Chung, Eric S. and Chiou, Derek and Constantinides, Kypros and Demme, John and Esmaeilzadeh, Hadi and Fowers, Jeremy and Gopal, Gopi Prashanth and Gray, Jan and Haselman, Michael and Hauck, Scott and Heil, Stephen and Hormati, Amir and Kim, Joo-Young and Lanka, Sitaram and Larus, James and Peterson, Eric and Pope, Simon and Smith, Aaron and Thong, Jason and Xiao, Phillip Yi and Burger, Doug}, 
- title = {A Reconfigurable Fabric for Accelerating Large-scale Datacenter Services}, +title = {A Reconfigurable Fabric for Accelerating Large-scale Datacenter Services}, 
- booktitle = {Proceeding of the 41st Annual International Symposium on Computer Architecuture}, +booktitle = {Proceeding of the 41st Annual International Symposium on Computer Architecuture}, 
- series = {ISCA '14}, +series = {ISCA '14}, 
- year = {2014}, +year = {2014}, 
- isbn = {978-1-4799-4394-4}, +isbn = {978-1-4799-4394-4}, 
- location = {Minneapolis, Minnesota, USA}, +location = {Minneapolis, Minnesota, USA}, 
- pages = {13--24}, +pages = {13--24}, 
- numpages = {12}, +numpages = {12}, 
- url = {http://dl.acm.org/citation.cfm?id=2665671.2665678}, +url = {http://dl.acm.org/citation.cfm?id=2665671.2665678}, 
- acmid = {2665678}, +acmid = {2665678}, 
- publisher = {IEEE Press}, +publisher = {IEEE Press}, 
- address = {Piscataway, NJ, USA},+address = {Piscataway, NJ, USA},
  
  
-[download] 
  
 @miscellaneous{accelerating-deep-convolutional-neural-networks-using-specialized-hardware, @miscellaneous{accelerating-deep-convolutional-neural-networks-using-specialized-hardware,
Line 457: Line 453:
 year = {2015}, year = {2015},
 month = {February}, month = {February},
-abstract = { +url = {https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/CNN20Whitepaper.pdf},
- +
-We describe the design of a convolutional neural network accelerator running on a Stratix V FPGAThe design runs at three times the throughput of previous FPGA CNN accelerator designsWe show that the throughput/watt is significantly higher than for a GPU, and project the performance when ported to an Arria 10 FPGA. +
- +
- +
-},+
 publisher = {Microsoft Research}, publisher = {Microsoft Research},
 url = {https://www.microsoft.com/en-us/research/publication/accelerating-deep-convolutional-neural-networks-using-specialized-hardware/}, url = {https://www.microsoft.com/en-us/research/publication/accelerating-deep-convolutional-neural-networks-using-specialized-hardware/},
Line 474: Line 465:
  
 @inproceedings{Deitz:2001:ERS:377792.377807, @inproceedings{Deitz:2001:ERS:377792.377807,
- author = {Deitz, Steven J. and Chamberlain, Bradford L. and Snyder, Lawrence}, +author = {Deitz, Steven J. and Chamberlain, Bradford L. and Snyder, Lawrence}, 
- title = {Eliminating Redundancies in Sum-of-product Array Computations}, +title = {Eliminating Redundancies in Sum-of-product Array Computations}, 
- booktitle = {Proceedings of the 15th International Conference on Supercomputing}, +booktitle = {Proceedings of the 15th International Conference on Supercomputing}, 
- series = {ICS '01}, +series = {ICS '01}, 
- year = {2001}, +year = {2001}, 
- isbn = {1-58113-410-X}, +isbn = {1-58113-410-X}, 
- location = {Sorrento, Italy}, +location = {Sorrento, Italy}, 
- pages = {65--77}, +pages = {65--77}, 
- numpages = {13}, +numpages = {13}, 
- url = {http://doi.acm.org/10.1145/377792.377807}, +url = {http://doi.acm.org/10.1145/377792.377807}, 
- doi = {10.1145/377792.377807}, +doi = {10.1145/377792.377807}, 
- acmid = {377807}, +acmid = {377807}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA},+address = {New York, NY, USA},
 } }
  
 @inproceedings{Basu:2015:CTH:2863692.2863932, @inproceedings{Basu:2015:CTH:2863692.2863932,
- author = {Basu, Protonu and Hall, Mary and Williams, Samuel and Straalen, Brian Van and Oliker, Leonid and Colella, Phillip}, +author = {Basu, Protonu and Hall, Mary and Williams, Samuel and Straalen, Brian Van and Oliker, Leonid and Colella, Phillip}, 
- title = {Compiler-Directed Transformation for Higher-Order Stencils}, +title = {Compiler-Directed Transformation for Higher-Order Stencils}, 
- booktitle = {Proceedings of the 2015 IEEE International Parallel and Distributed Processing Symposium}, +booktitle = {Proceedings of the 2015 IEEE International Parallel and Distributed Processing Symposium}, 
- series = {IPDPS '15}, +series = {IPDPS '15}, 
- year = {2015}, +year = {2015}, 
- isbn = {978-1-4799-8649-1}, +isbn = {978-1-4799-8649-1}, 
- pages = {313--323}, +pages = {313--323}, 
- numpages = {11}, +numpages = {11}, 
- url = {http://dx.doi.org/10.1109/IPDPS.2015.103}, +url = {http://dx.doi.org/10.1109/IPDPS.2015.103}, 
- doi = {10.1109/IPDPS.2015.103}, +doi = {10.1109/IPDPS.2015.103}, 
- acmid = {2863932}, +acmid = {2863932}, 
- publisher = {IEEE Computer Society}, +publisher = {IEEE Computer Society}, 
- address = {Washington, DC, USA}, +address = {Washington, DC, USA}, 
- keywords = {Compiler Optimization, Stencil, High-Order, Multigrid, Mehrstellen},+keywords = {Compiler Optimization, Stencil, High-Order, Multigrid, Mehrstellen},
 } }
  
 @inproceedings{Putnam:2008:CHC:1344671.1344720, @inproceedings{Putnam:2008:CHC:1344671.1344720,
- author = {Putnam, Andrew R. and Bennett, Dave and Dellinger, Eric and Mason, Jeff and Sundararajan, Prasanna}, +author = {Putnam, Andrew R. and Bennett, Dave and Dellinger, Eric and Mason, Jeff and Sundararajan, Prasanna}, 
- title = {CHiMPS: A High-level Compilation Flow for Hybrid CPU-FPGA Architectures}, +title = {CHiMPS: A High-level Compilation Flow for Hybrid CPU-FPGA Architectures}, 
- booktitle = {Proceedings of the 16th International ACM/SIGDA Symposium on Field Programmable Gate Arrays}, +booktitle = {Proceedings of the 16th International ACM/SIGDA Symposium on Field Programmable Gate Arrays}, 
- series = {FPGA '08}, +series = {FPGA '08}, 
- year = {2008}, +year = {2008}, 
- isbn = {978-1-59593-934-0}, +isbn = {978-1-59593-934-0}, 
- location = {Monterey, California, USA}, +location = {Monterey, California, USA}, 
- pages = {261--261}, +pages = {261--261}, 
- numpages = {1}, +numpages = {1}, 
- url = {http://doi.acm.org/10.1145/1344671.1344720}, +url = {http://doi.acm.org/10.1145/1344671.1344720}, 
- doi = {10.1145/1344671.1344720}, +doi = {10.1145/1344671.1344720}, 
- acmid = {1344720}, +acmid = {1344720}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA}, +address = {New York, NY, USA}, 
- keywords = {FPGA, FPGA accelerators, c-to-gates, high-performance computing, reconfigurable computing},+keywords = {FPGA, FPGA accelerators, c-to-gates, high-performance computing, reconfigurable computing},
  
 @inproceedings{Wong:2011:CFV:1950413.1950419, @inproceedings{Wong:2011:CFV:1950413.1950419,
- author = {Wong, Henry and Betz, Vaughn and Rose, Jonathan}, +author = {Wong, Henry and Betz, Vaughn and Rose, Jonathan}, 
- title = {Comparing FPGA vs. Custom Cmos and the Impact on Processor Microarchitecture}, +title = {Comparing FPGA vs. Custom Cmos and the Impact on Processor Microarchitecture}, 
- booktitle = {Proceedings of the 19th ACM/SIGDA International Symposium on Field Programmable Gate Arrays}, +booktitle = {Proceedings of the 19th ACM/SIGDA International Symposium on Field Programmable Gate Arrays}, 
- series = {FPGA '11}, +series = {FPGA '11}, 
- year = {2011}, +year = {2011}, 
- isbn = {978-1-4503-0554-9}, +isbn = {978-1-4503-0554-9}, 
- location = {Monterey, CA, USA}, +location = {Monterey, CA, USA}, 
- pages = {5--14}, +pages = {5--14}, 
- numpages = {10}, +numpages = {10}, 
- url = {http://doi.acm.org/10.1145/1950413.1950419}, +url = {http://doi.acm.org/10.1145/1950413.1950419}, 
- doi = {10.1145/1950413.1950419}, +doi = {10.1145/1950413.1950419}, 
- acmid = {1950419}, +acmid = {1950419}, 
- publisher = {ACM}, +publisher = {ACM}, 
- address = {New York, NY, USA}, +address = {New York, NY, USA}, 
- keywords = {area, cmos, delay, fpga, soft processor},+keywords = {area, cmos, delay, fpga, soft processor},
  
  
 @article{DBLP:journals/corr/GruslysMDLG16, @article{DBLP:journals/corr/GruslysMDLG16,
-  author    = {Audrunas Gruslys and +author    = {Audrunas Gruslys and R{\'{e}}mi Munos and Ivo Danihelka and Marc Lanctot and Alex Graves}, 
-               R{\'{e}}mi Munos and +title     = {Memory-Efficient Backpropagation Through Time}, 
-               Ivo Danihelka and +journal   = {CoRR}, 
-               Marc Lanctot and +volume    = {abs/1606.03401}, 
-               Alex Graves}, +year      = {2016}, 
-  title     = {Memory-Efficient Backpropagation Through Time}, +url       = {http://arxiv.org/abs/1606.03401}, 
-  journal   = {CoRR}, +timestamp = {Fri, 01 Jul 2016 17:39:49 +0200}, 
-  volume    = {abs/1606.03401}, +biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/GruslysMDLG16}, 
-  year      = {2016}, +bibsource = {dblp computer science bibliography, http://dblp.org}
-  url       = {http://arxiv.org/abs/1606.03401}, +
-  timestamp = {Fri, 01 Jul 2016 17:39:49 +0200}, +
-  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/GruslysMDLG16}, +
-  bibsource = {dblp computer science bibliography, http://dblp.org}+
 } }
  
 @inproceedings{FlowMap1994, @inproceedings{FlowMap1994,
- author = {J. Cong and Ding, Yuzheng}, +author = {J. Cong and Ding, Yuzheng}, 
- title = {FlowMap: an optimal technology mapping algorithm for delay optimization in lookup-table based FPGA designs}, +title = {FlowMap: an optimal technology mapping algorithm for delay optimization in lookup-table based FPGA designs}, 
- booktitle = { IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, +booktitle = { IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, 
- year = {1994}, +year = {1994}, 
- isbn = {1937-4151}, +isbn = {1937-4151}, 
- pages = {1-12}, +pages = {1-12}, 
- url = {http://ieeexplore.ieee.org/document/273754/}, +url = {http://ieeexplore.ieee.org/document/273754/}, 
- doi = {10.1109/43.273754}, +doi = {10.1109/43.273754}, 
- publisher = { IEEE}+publisher = { IEEE}
  
  
melange/papers/spring2018.1519236220.txt.gz · Last modified: 2018/02/21 11:03 by prerana