User Tools

Site Tools


melange:papers:spring2018

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revision Both sides next revision
melange:papers:spring2018 [2018/02/21 11:03]
prerana
melange:papers:spring2018 [2018/02/21 17:17]
sanjay
Line 1: Line 1:
 @article{Bielecki:​2016:​TAN:​3060371.3060383,​ @article{Bielecki:​2016:​TAN:​3060371.3060383,​
- ​author = {Bielecki, ​W\lodzimierz ​and Pa\lkowski, Marek},+ ​author = {Bielecki, ​Wlodzimierz ​and Pa\lkowski, Marek},
  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},​  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},​
  ​journal = {Int. J. Appl. Math. Comput. Sci.},  ​journal = {Int. J. Appl. Math. Comput. Sci.},
Line 48: Line 48:
 number="​6",​ number="​6",​
 pages="​607--631",​ pages="​607--631",​
-abstract="​There are many algorithms for the space-time mapping of nested loops. Some of them even make the optimal choices within their framework. We propose a preprocessing phase for algorithms in the polytope model, which extends the model and yields space-time mappings whose schedule is, in some cases, orders of magnitude faster. These are cases in which the dependence graph has small irregularities. The basic idea is to split the index set of the loop nests into parts with a regular dependence structure and apply the existing space-time mapping algorithms to these parts individually. This work is based on a seminal idea in the more limited context of loop parallelization at the code level. We elevate the idea to the model level (our model is the polytope model), which increases its applicability by providing a clearer and wider range of choices at an acceptable analysis cost. Index set splitting is one facet in the effort to extend the power of the polytope model and to enable the generation of competitive target code.",​ 
-issn="​1573-7640",​ 
 doi="​10.1023/​A:​1007516818651",​ doi="​10.1023/​A:​1007516818651",​
 url="​https://​doi.org/​10.1023/​A:​1007516818651"​ url="​https://​doi.org/​10.1023/​A:​1007516818651"​
Line 79: Line 77:
 number={1}, ​ number={1}, ​
 pages={127-138}, ​ pages={127-138}, ​
-keywords={DRAM chips;data flow computing;​energy conservation;​feedforward neural nets;​learning (artificial intelligence);​neural net architecture;​power aware computing;​reconfigurable architectures;​AI systems;​AlexNet;​CNN shapes;DRAM accesses;​Eyeriss;​MAC;​RS dataflow reconfiguration;​accelerator chip;​convolutional layers;data movement energy cost;​dataflow processing;​deep convolutional neural networks;​energy efficiency;​energy-efficient reconfigurable accelerator;​multiply and accumulation;​off-chip DRAM;​reconfiguring architecture;​row stationary;​spatial architecture;​Clocks;​Computer architecture;​Hardware;​Neural networks;​Random access memory;​Shape;​Throughput;​Convolutional neural networks (CNNs);​dataflow processing;​deep learning;​energy-efficient accelerators;​spatial architecture}, +url = {http://​ieeexplore.ieee.org/​document/​7738524/​}, 
 doi={10.1109/​JSSC.2016.2616357}, ​ doi={10.1109/​JSSC.2016.2616357}, ​
 ISSN={0018-9200}, ​ ISSN={0018-9200}, ​
 month={Jan},​} month={Jan},​}
  
-@ARTICLE{88484,  +@inproceedings{Baxter:​1989:​RPS:​72935.72967
-author={JH. Saltz and R. Mirchandaney ​and KCrowley},  + ​author = {Baxter, D. and Mirchandaney, ​R. and Saltz, J. H.}, 
-journal={IEEE Transactions on Computers},  + title = {Run-time ​Parallelization ​and Scheduling ​of Loops}, 
-title={Run-time ​parallelization ​and scheduling ​of loops},  + booktitle ​= {Proceedings of the First Annual ACM Symposium on Parallel Algorithms and Architectures}, 
-year={1991},  + series ​= {SPAA '89}, 
-volume={40},  + year = {1989}, 
-number={5},  + isbn = {0-89791-323-X}, 
-pages={603-612},  + location ​= {Santa Fe, New Mexico, USA}, 
-keywords={parallel programming;​scheduling;​Encore Multimax;​automatic parallelization;​automatic scheduling;​compile-time information;​concurrently executable loop iterations;​do loop;​execution time preprocessing;​executors;​inspector procedures;​loop dependency analysis;​loop indexes;run-time methods;​run-time reordering;​source code loop structures;​symbolic transformation rules;​transformed versions;​wavefronts;​Computer science;​Costs;​Failure analysis;​Level set;​NASA;​Parallel processing;​Performance analysis;​Processor scheduling;​Runtime},  + pages = {303--312}, 
-doi={10.1109/12.88484},  + ​numpages = {10}, 
-ISSN={0018-9340},  + url = {http://​doi.acm.org/​10.1145/​72935.72967}, 
-month={May},}+ doi = {10.1145/72935.72967}, 
 + acmid = {72967}, 
 + publisher ​= {ACM}, 
 + ​address = {New York, NY, USA}, 
 +}
  
-BibTeX | EndNote | ACM Ref 
 @article{Feautrier:​2006:​SSS:​1217445.1217447,​ @article{Feautrier:​2006:​SSS:​1217445.1217447,​
  ​author = {Feautrier, Paul},  ​author = {Feautrier, Paul},
Line 157: Line 158:
 } }
  
-@inproceedings{sanjay-fsttcs86,​ +@inproceedings{Rajopadhye:​1986:​SSA:​646824.706926
-        Address = {New Delhi, India}+ author ​= {Rajopadhye, ​Sanjay ​V. and Purushothaman,​ S. and Fujimoto, ​Richard}, 
-        ​Author ​= {Rajopadhye, ​S. V. and Purushothaman,​ S. and Fujimoto, ​R. M.}, + title = {On Synthesizing Systolic Arrays from Recurrence Equations with Linear Dependencies}, 
-        ​Booktitle ​= {Proceedings, Sixth Conference on Foundations of Software + ​booktitle = {Proceedings of the Sixth Conference on Foundations of Software Technology and Theoretical Computer Science}, 
-                  ​Technology and Theoretical Computer Science}, + year = {1986}, 
-        ​Key ​= {Rajopadhye86b}, + isbn = {3-540-17179-7}, 
-        ​Month ​= {December}, + pages = {488--503}, 
-        ​Pages ​= {488-503},​ + numpages ​= {16}, 
-        ​Publisher ​= {Springer VerlagLNCS~241}, + url = {http://​dl.acm.org/​citation.cfm?​id=646824.706926}, 
-        ​Title ​= {On Synthesizing Systolic Arrays from Recurrence Equations + acmid = {706926}, 
-                  with Linear Dependencies}, + publisher = {Springer-Verlag}, 
-        ​Year ​= {1986}}+ address ​= {London, UK, UK}
 +}
  
-@article{quinton-jvsp89+@Article{Quinton1989
-        ​Author ​{Quinton, ​P. and {Van Dongen}V.}+author="Quinton, ​Patrice 
-        ​Journal ​{Journal of {VLSI} Signal Processing}+and van Dongen, ​Vincent"​
-        ​Number ​= 2, +title="The mapping of linear recurrence equations on regular arrays",​ 
-        ​Pages ​{95-113}+journal="​Journal of VLSI signal processing systems for signal, image and video technology",​ 
-        ​Publisher ​{Kluwer Academic Publishers, Boston}+year="​1989",​ 
-        ​Title ​{The Mapping of Linear Recurrence Equations on Regular Arrays}+month="​Oct",​ 
-        ​Volume ​1, +day="​01",​ 
-        Year = 1989}+volume="​1"​
 +number="2"
 +pages="95--113"
 +issn="​0922-5773"​
 +doi="​10.1007/​BF02477176"​
 +url="​https://​doi.org/​10.1007/​BF02477176"​ 
 +}
  
 @Article{feautrier91,​ @Article{feautrier91,​
-author= ​        ​"​Feautrier,​ P.",+author= "​Feautrier,​ P.",
 title= ​         "​Dataflow analysis of array and scalar references",​ title= ​         "​Dataflow analysis of array and scalar references",​
 journal= ​       "​International Journal of Parallel Programming",​ journal= ​       "​International Journal of Parallel Programming",​
Line 189: Line 197:
 pages= ​         "​23-53",​ pages= ​         "​23-53",​
 month= ​         "​Feb",​ month= ​         "​Feb",​
-annote        ​"This article explains how a simple imperative language +url           "http://www.cs.colostate.edu/​~cs560/​Spring2011/​Notes/​FeautrierEDFAijpp91.pdf"
-        program (consisting only of assignments,​ for loops with affine loop +
-        limits, and arrays with affine index expressions),​ can be statically +
-        analyzed to find the flow dependencies."+
 } }
  
 @article{feautrier92a,​ @article{feautrier92a,​
-  ​author ​   = {Paul Feautrier},​ +author ​   = {Paul Feautrier},​ 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part I}.  One-dimensional Time}, 
-              ​{Part I}.  One-dimensional Time}, +journal ​  = {International Journal of Parallel Programming},​ 
-  journal ​  = {International Journal of Parallel Programming},​ +volume ​   = {21}, 
-  volume ​   = {21}, +number ​   = {5}, 
-  number ​   = {5}, +year      = {1992}, 
-  year      = {1992}, +pages     = {313-347},​ 
-  pages     = {313-347},​ +url        = {http://​dx.doi.org/​10.1007/​BF01407835}
-  ​ee ​       = {http://​dx.doi.org/​10.1007/​BF01407835}+
 } }
  
 @article{feautrier92b,​ @article{feautrier92b,​
-  ​author ​   = {Paul Feautrier},​ +author ​   = {Paul Feautrier},​ 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part II}. Multidimensional Time}, 
-               {Part II}. Multidimensional Time}, +journal ​  = {International Journal of Parallel Programming},​ 
-  journal ​  = {International Journal of Parallel Programming},​ +volume ​   = {21}, 
-  volume ​   = {21}, +number ​   = {6}, 
-  number ​   = {6}, +year      = {1992}, 
-  year      = {1992}, +pages     = {389-420},​ 
-  pages     = {389-420},​ +url        = {http://​dx.doi.org/​10.1007/​BF01379404}
-  ​ee ​       = {http://​dx.doi.org/​10.1007/​BF01379404}+
 } }
  
Line 227: Line 230:
   number =       2,   number =       2,
   pages =        {210-226},   pages =        {210-226},
 +  url =          {http://​doi.acm.org/​10.1145/​209937.209947},​
   month =        {Feb}}   month =        {Feb}}
  
Line 241: Line 245:
   address =       ​{Paphos,​ Cyprus},   address =       ​{Paphos,​ Cyprus},
   pages =         ​{283--303},​   pages =         ​{283--303},​
 +  url =           ​{http://​dx.doi.org/​10.1007/​978-3-642-11970-5_16},​
   month =         Mar,   month =         Mar,
   publisher =     ​{Springer-Verlag},​   publisher =     ​{Springer-Verlag},​
Line 247: Line 252:
  
 @ARTICAL{123,​ author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam @ARTICAL{123,​ author={Sanket Tavarageri, Albert Hartono, Muthu Baskaran, Louis-Noel Pouchet,J. Ramanujam
-and P. Sadayappan},​ title={Parametric Tiling of Affine Loop Nests}, year={2010},​ doi = {http://​web.cse.ohio-state.edu/​~pouchet.2/​doc/​cpc-article.10.pdf} ,}+and P. Sadayappan},​ title={Parametric Tiling of Affine Loop Nests}, year={2010}, url = {http://​web.cs.ucla.edu/​~pouchet/​doc/​cpc-article.10.pdf}, doi = {http://​web.cse.ohio-state.edu/​~pouchet.2/​doc/​cpc-article.10.pdf} ,}
  
  
Line 258: Line 263:
 year={2011}, ​ year={2011}, ​
 pages={35-42}, ​ pages={35-42}, ​
-keywords={floating point arithmetic;​matrix multiplication;​GFLOPS-W;​application-specific custom hardware;​floating point operations per second;​linear algebra core;matrix computations;​matrix-matrix multiplication;​power consumption reduction;​technology scaling;​Bandwidth;​Computer architecture;​Hardware;​Kernel;​Linear algebra;​Program processors;​Registers}, ​+keywords={floating point arithmetic;​matrix multiplication;​GFLOPS-W;​application-specific custom hardware;​floating point operations per second;​linear algebra core;matrix computations;​matrix-matrix multiplication;​power consumption reduction;​technology scaling;​Bandwidth;​Computer architecture;​Hardware;​Kernel;​Linear algebra;​Program processors;​Registers}, 
 +url = {http://​ieeexplore.ieee.org/​document/​6043234/​}, 
 doi={10.1109/​ASAP.2011.6043234}, ​ doi={10.1109/​ASAP.2011.6043234}, ​
 ISSN={1063-6862}, ​ ISSN={1063-6862}, ​
Line 318: Line 324:
 pages={300-309}, ​ pages={300-309}, ​
 keywords={graphics processing units;​learning (artificial intelligence);​parallel processing;​storage management;​Nvidia GTX Titan GPU;OpenCL stencil kernel;​automatic performance tuning;​graphics processing unit;​machine learning;​optimization;​random sampling;​stencil computation;​Graphics processing units;​Instruction sets;​Kernel;​Merging;​Optimization;​Parallel processing;​Yttrium;​GPGPU;​auto-tuning;​machine learning;​stencil}, ​ keywords={graphics processing units;​learning (artificial intelligence);​parallel processing;​storage management;​Nvidia GTX Titan GPU;OpenCL stencil kernel;​automatic performance tuning;​graphics processing unit;​machine learning;​optimization;​random sampling;​stencil computation;​Graphics processing units;​Instruction sets;​Kernel;​Merging;​Optimization;​Parallel processing;​Yttrium;​GPGPU;​auto-tuning;​machine learning;​stencil}, ​
-doi={10.1109/​ICPP.2015.39}, ​+doi={10.1109/​ICPP.2015.39}, 
 +url = {http://​ieeexplore.ieee.org/​document/​7349585/​}, 
 ISSN={0190-3918}, ​ ISSN={0190-3918}, ​
 month={Sept},​} month={Sept},​}
Line 400: Line 407:
  
 @article{cummins2017synthesizing,​ @article{cummins2017synthesizing,​
- 
    ​title={Synthesizing benchmarks for predictive modeling},    ​title={Synthesizing benchmarks for predictive modeling},
- 
    ​author={Cummins,​ Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh},    ​author={Cummins,​ Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh},
- 
    ​year={2017},​    ​year={2017},​
-    
    ​url={http://​homepages.inf.ed.ac.uk/​hleather/​publications/​2017-benchsynth-cgo.pdf}    ​url={http://​homepages.inf.ed.ac.uk/​hleather/​publications/​2017-benchsynth-cgo.pdf}
- 
 } }
  
 @article{optimistic2017,​ @article{optimistic2017,​
- 
    ​title={Optimistic Loop Optimization},​    ​title={Optimistic Loop Optimization},​
- 
    ​author={Doerfert,​ Johannes and Grosser, Tobias and Hack, Sebastian},    ​author={Doerfert,​ Johannes and Grosser, Tobias and Hack, Sebastian},
    url = {http://​dl.acm.org/​citation.cfm?​id=3049832.3049864},​    url = {http://​dl.acm.org/​citation.cfm?​id=3049832.3049864},​
    ​year={2017}    ​year={2017}
- 
 } }
  
-@inbook{e0cc7363fd684a529d1ba82b8195d530+@inproceedings{Ogilvie:​2017:​MCI:​3049832.3049859,​ 
-  title     ​"Minimizing the cost of iterative compilation ​with active learning"​+ ​author = {Ogilvie, William F. and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh}
-  ​keywords  ​"​Active LearningCompilersIterative CompilationMachine LearningSequential Analysis;"​+ title = {Minimizing the Cost of Iterative Compilation ​with Active Learning}
-  ​author ​   ​"​William Ogilvie and Pavlos Petoumenos and Zheng Wang and Hugh Leather"​+ booktitle ​{Proceedings of the 2017 International Symposium on Code Generation and Optimization}, 
-  ​note ​     ​"Date of Acceptance25/10/2016"+ ​series = {CGO '17}, 
-  ​year ​     ​"​2016"​+ year = {2017}, 
-  ​month ​    "​10"​+ isbn = {978-1-5090-4931-8}, 
-  ​booktitle ​"The International Symposium on Code Generation and Optimization (CGO) 2017",+ ​location = {Austin, USA}
 + pages {245--256}
 + numpages ​{12}, 
 + url = {http://dl.acm.org/​citation.cfm?​id=3049832.3049859}
 + acmid {3049859}
 + publisher ​{IEEE Press}
 + address ​{Piscataway,​ NJ, USA}, 
 + ​keywords = {Active Learning, Compilers, Iterative Compilation,​ Machine Learning, Sequential Analysis},
 } }
  
Line 457: Line 463:
 year = {2015}, year = {2015},
 month = {February}, month = {February},
-abstract ​= { +url = {https://www.microsoft.com/​en-us/​research/​wp-content/​uploads/​2016/​02/CNN20Whitepaper.pdf},
- +
-We describe the design of a convolutional neural network accelerator running on a Stratix V FPGAThe design runs at three times the throughput of previous FPGA CNN accelerator designsWe show that the throughput/watt is significantly higher than for a GPU, and project the performance when ported to an Arria 10 FPGA. +
- +
- +
-},+
 publisher = {Microsoft Research}, publisher = {Microsoft Research},
 url = {https://​www.microsoft.com/​en-us/​research/​publication/​accelerating-deep-convolutional-neural-networks-using-specialized-hardware/​},​ url = {https://​www.microsoft.com/​en-us/​research/​publication/​accelerating-deep-convolutional-neural-networks-using-specialized-hardware/​},​
melange/papers/spring2018.txt · Last modified: 2018/04/18 10:30 by prerana