%% README for probcomp bibliography file
%%
%% Maintainer: Feras A. Saad
%%
%% This .bib file uses the following conventions
%%
%% - The most important convention is consistency. When making an addition, it
%% is very likely that you can find an existing entry which matches the
%% template for the new entry, and requires only minor modifications.
%%
%% - Entries are sorted by year.
%%
%% - Entries are keyed by the last name of first author, followed by the year
%% followed by keyword of paper title (towner2019gen).
%%
%% - Each entry should have its keys aligned at the = sign.
%%
%% - Values of non-numeric fields are enclosed between curly braces. The month
%% field is written in lower case abbreviation without curly braces.
%%
%% - Use a trailing comma for the last key in an entry.
%%
%% - The abstract, if exists, should go as the final key in an entry. Do not
%% place hard line breaks in the abstract.
%%
%% - Publication titles are lower case, except for
%% - the first word in the title;
%% - proper nouns (e.g. {B}ayesian, {M}arkov). Make sure to enclose first
%% letter in curly braces to guarantee it is capitalized.
%% - first word after a colon (Picture: A ...). These should be
%% capitalized in the source, but do not require explicit curly braces.
%%
%% - Author names should
%% - follow the style
%% [A1-LastName], [A1-FirstName] and [A2-LastName], [A2-FirstName] ...
%% - please do NOT use:
%% [A1-FirstName] [A1-LastName] and [A2-FirstName] [A2-LastName] ...
%% - enclose compound last names in braces, i.e. {van de Meent}, Jan-Willem
%%
%% - Names of conference proceedings should be first spelled out in full,
%% followed by the acronym e.g.
%% booktitle = {PLDI 2019: Proceedings of the 40th ACM...}
%%
%% - Workshop presentations, use inproceedings (even though most workshops
%% do not have formal proceedings), and write (co-located with
%% ). See examples below.
%%
%% - BibBase specific keys:
%% - url_paper is mandatory when possible, and must be a direct link to a PDF.
%% - url_link is strongly recommended, and usually has an abstract etc.
%% - url_[foo] can be used for creating a custom link with text [foo],
%% such as url_supplement, url_slides, url_video, etc.
%% - note can be used for [Oral], [In review at ...], and so on, and should
%% always terminate with a period.
@inproceedings{saad2020fldr,
title = {The fast loaded dice roller: A near-optimal exact sampler for discrete probability distributions},
author = {Saad, Feras A. and Freer, Cameron E. and Rinard, Martin C. and Mansinghka, Vikash K.},
booktitle = {AISTATS 2020: Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics},
volume = 108,
fpages = {1640--1649},
series = {Proceedings of Machine Learning Research},
address = {Palermo, Sicily, Italy},
publisher = {PMLR},
year = 2020,
keywords = {random variate generation, sampling, discrete random variables},
abstract = {This paper introduces a new algorithm for the fundamental problem of generating a random integer from a discrete probability distribution using a source of independent and unbiased random coin flips. This algorithm, which we call the Fast Loaded Dice Roller (FLDR), has efficient complexity properties in space and time: the size of the sampler is guaranteed to be linear in the number of bits needed to encode the target distribution and the sampler consumes (in expectation) at most 6.5 bits of entropy more than the information-theoretically minimal rate, independently of the values or size of the target distribution. We present an easy-to-implement, linear-time preprocessing algorithm and a fast implementation of the FLDR using unsigned integer arithmetic. Empirical evaluations establish that the FLDR is 2x--10x faster than multiple baseline algorithms for exact sampling, including the widely-used alias and interval samplers. It also uses up to 10000x less space than the information-theoretically optimal sampler, at the expense of a less than 1.5x runtime overhead.},
note = {(To Appear)},
}
@article{saad2020sampling,
title = {Optimal approximate sampling from discrete probability distributions},
author = {Saad, Feras A. and Freer, Cameron E. and Rinard, Martin C. and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 4,
number = {POPL},
month = jan,
year = 2020,
pages = {36:1--36:31},
articleno = 36,
numpages = 31,
publisher = {ACM},
keywords = {discrete random variables, random variate generation},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3371104},
url_link = {https://doi.org/10.1145/3371104},
abstract = {This paper addresses a fundamental problem in random variate generation: given access to a random source that emits a stream of independent fair bits, what is the most accurate and entropy-efficient algorithm for sampling from a discrete probability distribution $(p_1, \dots, p_n)$, where the probabilities of the output distribution $(\hat{p}_1, \dots, \hat{p}_n)$ of the sampling algorithm must be specified using at most $k$ bits of precision? We present a theoretical framework for formulating this problem and provide new techniques for finding sampling algorithms that are optimal both statistically (in the sense of sampling accuracy) and information-theoretically (in the sense of entropy consumption). We leverage these results to build a system that, for a broad family of measures of statistical accuracy, delivers a sampling algorithm whose expected entropy usage is minimal among those that induce the same distribution (i.e., is ``entropy-optimal'') and whose output distribution $(\hat{p}_1, \dots, \hat{p}_n)$ is a closest approximation to the target distribution $(p_1, \dots, p_n)$ among all entropy-optimal sampling algorithms that operate within the specified $k$-bit precision. This optimal approximate sampler is also a closer approximation than any (possibly entropy-suboptimal) sampler that consumes a bounded amount of entropy with the specified precision, a class which includes floating-point implementations of inversion sampling and related methods found in many software libraries. We evaluate the accuracy, entropy consumption, precision requirements, and wall-clock runtime of our optimal approximate sampling algorithms on a broad set of distributions, demonstrating the ways that they are superior to existing approximate samplers and establishing that they often consume significantly fewer resources than are needed by exact samplers.},
}
@article{lew2020traces,
title = {Trace types and denotational semantics for sound programmable inference in probabilistic languages},
author = {Lew, Alexander K. and Cusumano-Towner, Marco F. and Sherman, Benjamin and Carbin, Michale and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 4,
number = {POPL},
month = jan,
year = 2020,
pages = {19:1--19:32},
articleno = 19,
numpages = 32,
publisher = {ACM},
keywords = {type systems, probabilistic programming, programmable inference},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3371087},
url_link = {https://doi.org/10.1145/3371087},
abstract = {Modern probabilistic programming languages aim to formalize and automate key aspects of probabilistic modeling and inference. Many languages provide constructs for programmable inference that enable developers to improve inference speed and accuracy by tailoring an algorithm for use with a particular model or dataset. Unfortunately, it is easy to use these constructs to write unsound programs that appear to run correctly but produce incorrect results. To address this problem, we present a denotational semantics for programmable inference in higher-order probabilistic programming languages, along with a type system that ensures that well-typed inference programs are sound by construction. A central insight is that the type of a probabilistic expression can track the space of its possible execution traces, not just the type of value that it returns, as these traces are often the objects that inference algorithms manipulate. We use our semantics and type system to establish soundness properties of custom inference programs that use constructs for variational, sequential Monte Carlo, importance sampling, and Markov chain Monte Carlo inference.},
}
@article{saad2019bayesian,
title = {Bayesian synthesis of probabilistic programs for automatic data modeling},
author = {Saad, Feras A. and Cusumano-Towner, Marco F. and Schaechtle, Ulrich and Rinard, Martin C. and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 3,
number = {POPL},
month = jan,
year = 2019,
pages = {37:1--37:32},
articleno = {37},
numpages = {29},
publisher = {ACM},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3290350},
url_link = {https://doi.org/10.1145/3290350},
url_video = {https://www.youtube.com/watch?v=T5fdUmYJsjM},
url_MIT_News = {https://news.mit.edu/2019/nonprogrammers-data-science-0115},
url_ZDNet_News = {https://www.zdnet.com/article/mit-aims-to-help-data-scientists-by-having-ai-do-the-heavy-lifting/},
abstract = {We present new techniques for automatically constructing probabilistic programs for data analysis, interpretation, and prediction. These techniques work with probabilistic domain-specific data modeling languages that capture key properties of a broad class of data generating processes, using Bayesian inference to synthesize probabilistic programs in these modeling languages given observed data. We provide a precise formulation of Bayesian synthesis for automatic data modeling that identifies sufficient conditions for the resulting synthesis procedure to be sound. We also derive a general class of synthesis algorithms for domain-specific languages specified by probabilistic context-free grammars and establish the soundness of our approach for these languages. We apply the techniques to automatically synthesize probabilistic programs for time series data and multivariate tabular data. We show how to analyze the structure of the synthesized programs to compute, for key qualitative properties of interest, the probability that the underlying data generating process exhibits each of these properties. Second, we translate probabilistic programs in the domain-specific language into probabilistic programs in Venture, a general-purpose probabilistic programming system. The translated Venture programs are then executed to obtain predictions of new time series data and new multivariate data records. Experimental results show that our techniques can accurately infer qualitative structure in multiple real-world data sets and outperform standard data analysis methods in forecasting and predicting new data.},
}
@inproceedings{towner2019gen,
title = {Gen: a general-purpose probabilistic programming system with programmable inference},
author = {Cusumano-Towner, Marco F. and Saad, Feras A. and Lew, Alexander K. and Mansinghka, Vikash K.},
year = 2019,
booktitle = {PLDI 2019: Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation},
pages = {221--236},
publisher = {ACM},
address = {Phoenix, Arizona},
url_link = {https://dl.acm.org/citation.cfm?id=3314221.3314642},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3314642},
url_MIT_News = {http://news.mit.edu/2019/ai-programming-gen-0626},
url_Venture_Beat_News = {https://venturebeat.com/2019/06/27/mits-gen-programming-system-allows-users-to-easily-create-computer-vision-statistical-ai-and-robotics-programs},
abstract = {Although probabilistic programming is widely used for some restricted classes of statistical models, existing systems lack the flexibility and efficiency needed for practical use with more challenging models arising in fields like computer vision and robotics. This paper introduces Gen, a general-purpose probabilistic programming system that achieves modeling flexibility and inference efficiency via several novel language constructs: (i) the generative function interface for encapsulating probabilistic models; (ii) interoperable modeling languages that strike different flexibility/efficiency trade-offs; (iii) combinators that exploit common patterns of conditional independence; and (iv) an inference library that empowers users to implement efficient inference algorithms at a high level of abstraction. We show that Gen outperforms state-of-the-art probabilistic programming systems, sometimes by multiple orders of magnitude, on diverse problems including object tracking, estimating 3D body pose from a depth image, and inferring the structure of a time series.},
}
@article{saad2019bayesian,
title = {Bayesian synthesis of probabilistic programs for automatic data modeling},
author = {Saad, Feras A. and Cusumano-Towner, Marco F. and Schaechtle, Ulrich and Rinard, Martin C. and Mansinghka, Vikash K.},
journal = {Proceedings of the ACM on Programming Languages},
volume = 3,
number = {POPL},
month = jan,
year = 2019,
pages = {37:1--37:32},
articleno = 37,
numpages = 29,
publisher = {ACM},
url_paper = {https://dl.acm.org/ft_gateway.cfm?id=3290350},
url_link = {https://doi.org/10.1145/3290350},
url_video = {https://www.youtube.com/watch?v=T5fdUmYJsjM},
url_MIT_News = {https://news.mit.edu/2019/nonprogrammers-data-science-0115},
url_ZDNet_News = {https://www.zdnet.com/article/mit-aims-to-help-data-scientists-by-having-ai-do-the-heavy-lifting/},
abstract = {We present new techniques for automatically constructing probabilistic programs for data analysis, interpretation, and prediction. These techniques work with probabilistic domain-specific data modeling languages that capture key properties of a broad class of data generating processes, using Bayesian inference to synthesize probabilistic programs in these modeling languages given observed data. We provide a precise formulation of Bayesian synthesis for automatic data modeling that identifies sufficient conditions for the resulting synthesis procedure to be sound. We also derive a general class of synthesis algorithms for domain-specific languages specified by probabilistic context-free grammars and establish the soundness of our approach for these languages. We apply the techniques to automatically synthesize probabilistic programs for time series data and multivariate tabular data. We show how to analyze the structure of the synthesized programs to compute, for key qualitative properties of interest, the probability that the underlying data generating process exhibits each of these properties. Second, we translate probabilistic programs in the domain-specific language into probabilistic programs in Venture, a general-purpose probabilistic programming system. The translated Venture programs are then executed to obtain predictions of new time series data and new multivariate data records. Experimental results show that our techniques can accurately infer qualitative structure in multiple real-world data sets and outperform standard data analysis methods in forecasting and predicting new data.},
}
@inproceedings{zinberg2019structured,
title = {Structured differentiable models of {3D} scenes via generative scene graphs},
author = {Zinberg, Ben and Cusumano-Towner, Marco and Mansinghka, Vikash},
booktitle = {Workshop on Perception as Generative Reasoning (co-located with NeurIPS 2019)},
year = 2019,
url_paper = {https://pgr-workshop.github.io/img/PGR019.pdf},
abstract = {Generative approaches to 3D scene perception and physical reasoning are increasingly common. There is a widespread need for scene models that can incorporate planar and point-wise contacts between physical objects, and ensure that objects do not inter-penetrate. Generic priors on 6DOF poses---and bottom up neural networks that de-render images into scenes---typically violate these constraints. This abstract introduces a family of generative models for 3D scenes that respect pairwise physical contact constraints between objects. The technical innovation is to represent scenes in terms of hybrid symbolic-numerical scene graphs over objects, where the edges correspond to parameterized contract relationships (see Figure 1). Given this representation, 3D poses can be generated via a graph traversal. We show how to define prior distributions on scene graphics, and how 3D scene understanding can be phrased as posterior inference over the scene graph. This abstract also shows preliminary evidence that it is possible to infer scene graph parameters from empirical data, "derendering" images into structured 3D scene representations. This is implemented using the Gen probabilistic programming language. Finally, this abstract briefly discusses representation and inference challenges that need to be addressed to scale up the approach to more complex, real-world scenes.}
}
@inproceedings{felip2019real,
title = {Real-time approximate inference for scene understanding with generative models},
author = {Felip, Javier and Ahuja, Nilesh and G{\'o}mez-Guti{\'e}rrez, David and Tickoo, Omesh and Mansinghka, Vikash},
booktitle = {Workshop on Perceptions as Generative Reasoning (co-located with NeurIPS 2019)},
year = 2019,
url_paper = {https://pgr-workshop.github.io/img/PGR019.pdf},
abstract = {Consider scene understanding problems such as predicting where a person is probably reaching, or inferring the pose of 3D objects from depth images, or inferring the probable street crossings of pedestrians. This paper shows how to solve these problems using inference in generative models, by introducing new techniques for real-time inference. The underlying generative models are built from realistic simulation software, wrapped in a Bayesian error model for the gap between simulation outputs and real data. This paper shows that it is possible to perform approximately Bayesian inference in these models, obtaining high-quality approximations to the full posterior over scenes, without using bottom-up neural networks. Instead, this paper introduces two general real-time inference techniques. The first is to train neural surrogates of the simulators. The second is to adaptively discretize the latent variables using a Tree-Pyramid (TP) approach. THis paper also shows that by combining these techniques, it is possible to perform accurate, approximately Bayesian inference in realistic generative models, in real time.}
}
@article{handa2019inference,
title = {Compositional inference metaprogramming with convergence guarantees},
author = {Handa, Shivam and Mansinghka, Vikash and Rinard, Martin},
year = 2019,
journal = {arXiv preprint},
volume = {arXiv:1907.05451},
url_paper = {https://arxiv.org/pdf/1907.05451.pdf},
url_link = {https://arxiv.org/abs/1907.05451},
abstract = {Inference metaprogramming enables effective probabilistic programming by supporting the decomposition of executions of probabilistic programs into subproblems and the deployment of hybrid probabilistic inference algorithms that apply different probabilistic inference algorithms to different subproblems. We introduce the concept of independent subproblem inference (as opposed to entangled subproblem inference in which the subproblem inference algorithm operates over the full program trace) and present a mathematical framework for studying convergence properties of hybrid inference algorithms that apply different Markov-Chain Monte Carlo algorithms to different parts of the inference problem. We then use this formalism to prove asymptotic convergence results for probablistic programs with inference metaprogramming. To the best of our knowledge this is the first asymptotic convergence result for hybrid probabilistic inference algorithms defined by (subproblem-based) inference metaprogramming.},
}
@article{witty2019causal,
author = {Witty, Sam and Lew, Alexander and Jensen, David and Mansinghka, Vikash},
title = {Bayesian causal inference via probabilistic program synthesis},
year = 2019,
journal = {arXiv preprint},
volume = {arXiv:1910.14124},
url_paper = {https://arxiv.org/pdf/1910.14124.pdf},
url_link = {https://arxiv.org/abs/1910.14124},
abstract = {Causal inference can be formalized as Bayesian inference that combines a prior distribution over causal models and likelihoods that account for both observations and interventions. We show that it is possible to implement this approach using a sufficiently expressive probabilistic programming language. Priors are represented using probabilistic programs that generate source code in a domain specific language. Interventions are represented using probabilistic programs that edit this source code to modify the original generative process. This approach makes it straightforward to incorporate data from atomic interventions, as well as shift interventions, variance-scaling interventions, and other interventions that modify causal structure. This approach also enables the use of general-purpose inference machinery for probabilistic programs to infer probable causal structures and parameters from data. This abstract describes a prototype of this approach in the Gen probabilistic programming language.},
}
@article {bolton2019zebra,
title = {Elements of a stochastic {3D} prediction engine in larval zebrafish prey capture},
author = {Bolton, Andrew D. and Haesemeyer, Martin and Jordi, Joshua and Schaechtle, Ulrich and Saad, Feras A. and Mansinghka, Vikash K. and Tenenbaum, Joshua B. and Engert, Florian},
editor = {Berman, Gordon J},
volume = 8,
year = 2019,
month = nov,
pages = {e51975},
journal = {eLife},
fdoi = {10.7554/eLife.51975},
url_link = {https://doi.org/10.7554/eLife.51975},
abstract = {The computational principles underlying predictive capabilities in animals are poorly understood. Here, we wondered whether predictive models mediating prey capture could be reduced to a simple set of sensorimotor rules performed by a primitive organism. For this task, we chose the larval zebrafish, a tractable vertebrate that pursues and captures swimming microbes. Using a novel naturalistic 3D setup, we show that the zebrafish combines position and velocity perception to construct a future positional estimate of its prey, indicating an ability to project trajectories forward in time. Importantly, the stochasticity in the fish's sensorimotor transformations provides a considerable advantage over equivalent noise-free strategies. This surprising result coalesces with recent findings that illustrate the benefits of biological stochasticity to adaptive behavior. In sum, our study reveals that zebrafish are equipped with a recursive prey capture algorithm, built up from simple stochastic rules, that embodies an implicit predictive model of the world.},
publisher = {eLife Sciences Publications, Ltd},
}
@inproceedings{ackerman2019barriers,
author = {Ackerman, Nathanael L. and Avigad, Jeremy and Freer, Cameron E. and Roy, Daniel M. and Rute, Jason M.},
title = {Algorithmic barriers to representing conditional independence},
booktitle = {LICS 2019: Proceedings of the 34th Annual ACM/IEEE Symposium on Logic in Computer Science},
address = {Vancouver, British Columbia},
publisher = {ACM},
year = 2019,
url_paper = {http://math.mit.edu/~freer/papers/AAFRR-algorithmic-barriers.pdf},
url_doi = {10.1109/LICS.2019.8785762},
keywords = {conditional independence, computable probability distributions, graphons, cut distance, invariant measures, random-freeness},
abstract = {We define a representation of conditional independence in terms of products of probability kernels, and ask when such representations are computable. We pursue this question in the context of exchangeable sequences and arrays of random variables, which arise in statistical contexts. Exchangeable sequences are conditionally i.i.d. by de Finetti’s theorem. Known results about the computability of de Finetti’s theorem imply that these conditional independences are computable. The conditional independences underlying exchangeable arrays are characterized by the Aldous–Hoover theorem. In the special case of adjacency matrices of undirected graphs, i.e., symmetric binary arrays, this representation theorem expresses the conditional independences in terms of graphons. We prove that there exist exchangeable random graphs that can be computably sampled but whose corresponding graphons are not computable as functions or even as L1 equivalence classes. We also give results on the approximability of graphons in certain special cases.},
}
@inproceedings{saad2019gof,
author = {Saad, Feras A. and Freer, Cameron E. and Ackerman, Nathanael L. and Mansinghka, Vikash K.},
title = {A family of exact goodness-of-fit tests for high-dimensional discrete distributions},
booktitle = {AISTATS 2019: Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics},
volume = 89,
pages = {1640--1649},
series = {Proceedings of Machine Learning Research},
address = {Naha, Okinawa, Japan},
publisher = {PMLR},
year = 2019,
url_paper = {http://proceedings.mlr.press/v89/saad19a/saad19a.pdf},
url_supplement = {http://proceedings.mlr.press/v89/saad19a/saad19a-supp.pdf},
url_link = {http://proceedings.mlr.press/v89/saad19a.html},
keywords = {hypothesis testing, statistical inference, goodness-of-fit tests},
abstract = {The objective of goodness-of-fit testing is to assess whether a dataset of observations is likely to have been drawn from a candidate probability distribution. This paper presents a rank-based family of goodness-of-fit tests that is specialized to discrete distributions on high-dimensional domains. The test is readily implemented using a simulation-based, linear-time procedure. The testing procedure can be customized by the practitioner using knowledge of the underlying data domain. Unlike most existing test statistics, the proposed test statistic is distribution-free and its exact (non-asymptotic) sampling distribution is known in closed form. We establish consistency of the test against all alternatives by showing that the test statistic is distributed as a discrete uniform if and only if the samples were drawn from the candidate distribution. We illustrate its efficacy for assessing the sample quality of approximate sampling algorithms over combinatorially large spaces with intractable probabilities, including random partitions in Dirichlet process mixture models and random lattices in Ising models.},
}
@article{ackerman2019computability,
author = {Ackerman, Nathanael L. and Freer, Cameron E. and Roy, Daniel M.},
title = {On the computability of conditional probability},
journal = {Journal of the ACM},
volume = 66,
number = 3,
month = jun,
year = 2019,
pages = {23:1--23:40},
articleno = 23,
numpages = 40,
publisher = {ACM},
address = {New York, NY, USA},
url_doi = {http://doi.acm.org/10.1145/3321699},
url_paper = {https://arxiv.org/pdf/1005.3014.pdf},
keywords = {computable probability theory, conditional probability, real computation},
abstract = {As inductive inference and machine learning methods in computer science see continued success, researchers are aiming to describe ever more complex probabilistic models and inference algorithms. It is natural to ask whether there is a universal computational procedure for probabilistic inference. We investigate the computability of conditional probability, a fundamental notion in probability theory and a cornerstone ofBayesian statistics. We show that there are computable joint distributions with noncomputable conditional distributions, ruling out the prospect of general inference algorithms, even inefficient ones. Specifically, we construct a pair of computable random variables in the unit interval such that the conditional distribution of the first variable given the second encodes the halting problem. Nevertheless, probabilistic inference is possible in many common modeling settings, and we prove several results giving broadly applicable conditions under which conditional distributions are computable. In particular, conditional distributions become computable when measurements are corrupted by independent computable noise with a sufficiently smooth bounded density},
}
@techreport{towner2018gen,
title = {Gen: A general-purpose probabilistic programming system with programmable inference},
author = {Cusumano-Towner, Marco F. and Saad, Feras A. and Lew, Alexander and Mansinghka, Vikash K.},
year = 2018,
number = {MIT-CSAIL-TR-2018-020},
institution = {Computer Science and Artificial Intelligence Laboratory, Massachusetts Institute of Technology},
address = {Cambridge, Massachusetts},
month = nov,
url_link = {http://hdl.handle.net/1721.1/119255},
abstract = {Probabilistic modeling and inference are central to many fields. A key challenge for wider adoption of probabilistic programming languages is designing systems that are both flexible and performant. This paper introduces Gen, a new probabilistic programming system with novel language con- structs for modeling and for end-user customization and optimization of inference. Gen makes it practical to write probabilistic programs that solve problems from multiple fields. Gen programs can combine generative models written in Julia, neural networks written in TensorFlow, and custom inference algorithms based on an extensible library of Monte Carlo and numerical optimization techniques. This paper also presents techniques that enable Gen's combination of flexibility and performance: (i) the generative function interface, an abstraction for encapsulating probabilistic and/or differentiable computations; (ii) domain-specific languages with custom compilers that strike different flexibility/performance tradeoffs; (iii) combinators that encode common patterns of conditional independence and repeated compu- tation, enabling speedups from caching; and (iv) a standard inference library that supports custom proposal distributions also written as programs in Gen. This paper shows that Gen outperforms state-of-the-art probabilistic programming systems, sometimes by multiple orders of magnitude, on problems such as nonlinear state-space modeling, structure learning for real-world time series data, robust regression, and 3D body pose estimation from depth images.},
}
@inproceedings{mansinghka2018probabilistic,
title = {Probabilistic programming with programmable inference},
author = {Mansinghka, Vikash K. and Schaechtle, Ulrich and Handa, Shivam and Radul, Alexey and Chen, Yutian and Rinard, Martin},
booktitle = {PLDI 2018: Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation},
year = 2018,
publisher = {ACM},
pages = {603--616},
url_link = {https://dl.acm.org/citation.cfm?id=3192409},
abstract = {We introduce inference metaprogramming for probabilistic programming languages, including new language constructs, a formalism, and the first demonstration of effectiveness in practice. Instead of relying on rigid black-box inference algorithms hard-coded into the language implementation as in previous probabilistic programming languages, inference metaprogramming enables developers to 1) dynamically decompose inference problems into subproblems, 2) apply inference tactics to subproblems, 3) alternate between incorporating new data and performing inference over existing data, and 4) explore multiple execution traces of the probabilistic program at once. Implemented tactics include gradient-based optimization, Markov chain Monte Carlo, variational inference, and sequental Monte Carlo techniques. Inference metaprogramming enables the concise expression of probabilistic models and inference algorithms across diverse fields, such as computer vision, data science, and robotics, within a single probabilistic programming language.},
}
@inproceedings{towner2018incremental,
title = {Incremental inference for probabilistic programs},
author = {Cusumano-Towner, Marco F. and Bichsel, Benjamin and Gehr, Timon and Vechev, Martin and Mansinghka, Vikash K.},
year = 2018,
booktitle = {PLDI 2018: Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation},
pages = {571--585},
publisher = {ACM},
url_link = {https://dl.acm.org/citation.cfm?id=3192399},
abstract = {We present a novel approach for approximate sampling in probabilistic programs based on incremental inference. The key idea is to adapt the samples for a program $P$ into samples for a program $Q$, thereby avoiding the expensive sampling computation for program $Q$. To enable incremental inference in probabilistic programming, our work: (i) introduces the concept of a trace translator which adapts samples from $P$ into samples of $Q$, (ii) phrases this translation approach in the context of sequential Monte Carlo (SMC), which gives theoretical guarantees that the adapted samples converge to the distribution induced by $Q$, and (iii) shows how to obtain a concrete trace translator by establishing a correspondence between the random choices of the two probabilistic programs. We implemented our approach in two different probabilistic programming systems and showed that, compared to methods that sample the program $Q$ from scratch, incremental inference can lead to orders of magnitude increase in efficiency, depending on how closely related $P$ and $Q$ are.},
}
@inproceedings{towner2018design,
title = {A design proposal for Gen: Probabilistic programming with fast custom inference via code generation},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {MAPL 2018: Workshop on Machine Learning and Programming Languages (co-located with PLDI 2018)},
year = 2018,
pages = {52--57},
url_link = {https://dl.acm.org/citation.cfm?id=3211350},
abstract = {Probabilistic programming languages have the potential to make probabilistic modeling and inference easier to use in practice, but only if inference is sufficiently fast and accurate for real applications. Thus far, this has only been possible for domain-specific languages that focus on a restricted class of models and inference algorithms. This paper proposes a design for a probabilistic programming language called Gen, embedded in Julia, that aims to be sufficiently expressive and performant for general-purpose use. The language provides constructs for automatically generating optimized implementations of custom inference tactics based on static analysis of the target probabilistic model. This paper informally describes a language design for Gen, and shows that Gen is more expressive than Stan, a widely used language for hierarchical Bayesian modeling. A first benchmark shows that a prototype implementation of Gen can be as fast as Stan, only ∼1.4x slower than a hand-coded sampler in Julia, and ∼7,500x faster than Venture, one of the only other probabilistic languages with support for custom inference.}
}
@inproceedings{saad2018trcrpm,
author = {Saad, Feras A. and Mansinghka, Vikash K.},
title = {Temporally-reweighted {C}hinese restaurant process mixtures for clustering, imputing, and forecasting multivariate time series},
booktitle = {AISTATS 2018: Proceedings of the 21st International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = 84,
pages = {755--764},
publisher = {PMLR},
year = 2018,
keywords = {probabilistic inference, multivariate time series, nonparametric Bayes, structure learning},
url_paper = {http://proceedings.mlr.press/v84/saad18a/saad18a.pdf},
url_supplement = {http://proceedings.mlr.press/v84/saad18a/saad18a-supp.pdf},
url_link = {http://proceedings.mlr.press/v84/saad18a.html},
url_code = {https://github.com/probcomp/trcrpm},
abstract = {This article proposes a Bayesian nonparametric method for forecasting, imputation, and clustering in sparsely observed, multivariate time series data. The method is appropriate for jointly modeling hundreds of time series with widely varying, non-stationary dynamics. Given a collection of N time series, the Bayesian model first partitions them into independent clusters using a Chinese restaurant process prior. Within a cluster, all time series are modeled jointly using a novel “temporally-reweighted” extension of the Chinese restaurant process mixture. Markov chain Monte Carlo techniques are used to obtain samples from the posterior distribution, which are then used to form predictive inferences. We apply the technique to challenging forecasting and imputation tasks using seasonal flu data from the US Center for Disease Control and Prevention, demonstrating superior forecasting accuracy and competitive imputation accuracy as compared to multiple widely used baselines. We further show that the model discovers interpretable clusters in datasets with hundreds of time series, using macroeconomic data from the Gapminder Foundation.},
}
@inproceedings{towner2018proposals,
title = {Using probabilistic programs as proposals},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {Workshop on Probabilistic Programming Languages, Semantics, and Systems (co-located with POPL 2018)},
year = 2018,
url_paper = {https://arxiv.org/pdf/1801.03612.pdf},
url_link = {https://popl18.sigplan.org/event/pps-2018-using-probabilistic-programs-as-proposals},
keywords = {probabilistic programming, amortized inference, randomized algorithms, neural networks, proposal distribution, importance sampling, metropolis-hastings, auxiliary variable},
abstract = {Monte Carlo inference has asymptotic guarantees, but can be slow when using generic proposals. Handcrafted proposals that rely on user knowledge about the posterior distribution can be efficient, but are difficult to derive and implement. This paper proposes to let users express their posterior knowledge in the form of proposal programs, which are samplers written in probabilistic programming languages. One strategy for writing good proposal programs is to combine domain-specific heuristic algorithms with neural network models. The heuristics identify high probability regions, and the neural networks model the posterior uncertainty around the outputs of the algorithm. Proposal programs can be used as proposal distributions in importance sampling and Metropolis-Hastings samplers without sacrificing asymptotic consistency, and can be optimized offline using inference compilation. Support for optimizing and using proposal programs is easily implemented in a sampling-based probabilistic programming runtime. The paper illustrates the proposed technique with a proposal program that combines RANSAC and neural networks to accelerate inference in a Bayesian linear regression with outliers model.},
}
@inproceedings{towner2017aide,
title = {{AIDE}: An algorithm for measuring the accuracy of probabilistic inference algorithms},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {NIPS 2017: Advances in Neural Information Processing Systems 30},
year = 2017,
pages = {3004--3014},
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms.pdf},
url_supplement = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms-supplemental.zip},
url_link = {https://papers.nips.cc/paper/6893-aide-an-algorithm-for-measuring-the-accuracy-of-probabilistic-inference-algorithms},
url_code = {https://github.com/probcomp/nips2017-aide-experiments},
keywords = {probabilistic inference, sequential monte carlo, variational inference, approximate inference, kullback-leibler},
abstract = {Approximate probabilistic inference algorithms are central to many fields. Examples include sequential Monte Carlo inference in robotics, variational inference in machine learning, and Markov chain Monte Carlo inference in statistics. A key problem faced by practitioners is measuring the accuracy of an approximate inference algorithm on a specific dataset. This paper introduces the auxiliary inference divergence estimator (AIDE), an algorithm for measuring the accuracy of approximate inference algorithms. AIDE is based on the observation that inference algorithms can be treated as probabilistic models and the random variables used within the inference algorithm can be viewed as auxiliary variables. This view leads to a new estimator for the symmetric KL divergence between the output distributions of two inference algorithms. The paper illustrates application of AIDE to algorithms for inference in regression, hidden Markov, and Dirichlet process mixture models. The experiments show that AIDE captures the qualitative behavior of a broad class of inference algorithms and can detect failure modes of inference algorithms that are missed by standard heuristics.},
}
@inproceedings{saad2017dependencies,
author = {Feras Saad and Vikash Mansinghka},
title = {Detecting dependencies in sparse, multivariate databases using probabilistic programming and non-parametric {B}ayes},
booktitle = {AISTATS 2017: Proceedings of the 20th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = 54,
pages = {632--641},
publisher = {PMLR},
year = 2017,
address = {Fort Lauderdale, Florida},
url_paper = {http://proceedings.mlr.press/v54/saad17a/saad17a.pdf},
url_supplement = {http://proceedings.mlr.press/v54/saad17a/saad17a-supp.pdf},
url_link = {http://proceedings.mlr.press/v54/saad17a.html},
abstract = {Datasets with hundreds of variables and many missing values are commonplace. In this setting, it is both statistically and computationally challenging to detect true predictive relationships between variables and also to suppress false positives. This paper proposes an approach that combines probabilistic programming, information theory, and non-parametric Bayes. It shows how to use Bayesian non-parametric modeling to (i) build an ensemble of joint probability models for all the variables; (ii) efficiently detect marginal independencies; and (iii) estimate the conditional mutual information between arbitrary subsets of variables, subject to a broad class of constraints. Users can access these capabilities using BayesDB, a probabilistic programming platform for probabilistic data analysis, by writing queries in a simple, SQL-like language. This paper demonstrates empirically that the method can (i) detect context-specific (in)dependencies on challenging synthetic problems and (ii) yield improved sensitivity and specificity over baselines from statistics and machine learning, on a real-world database of over 300 sparsely observed indicators of macroeconomic development and public health.},
keywords = {dependence detection, probabilistic programming, nonparametric Bayes},
}
@article{schaechtle2017timeseries,
author = {Schaechtle, Ulrich and Saad, Feras and Radul, Alexey and Mansinghka, Vikash K.},
title = {Time series structure discovery via probabilistic program synthesis},
year = 2017,
journal = {arXiv preprint},
volume = {arXiv:1611.07051},
url_paper = {https://arxiv.org/pdf/1611.07051.pdf},
url_link = {https://arxiv.org/abs/1611.07051},
keywords = {probabilistic programming, program synthesis, structure discovery},
abstract = {There is a widespread need for techniques that can discover structure from time series data. Recently introduced techniques such as Automatic Bayesian Covariance Discovery (ABCD) provide a way to find structure within a single time series by searching through a space of covariance kernels that is generated using a simple grammar. While ABCD can identify a broad class of temporal patterns, it is difficult to extend and can be brittle in practice. This paper shows how to extend ABCD by formulating it in terms of probabilistic program synthesis. The key technical ideas are to (i) represent models using abstract syntax trees for a domain-specific probabilistic language, and (ii) represent the time series model prior, likelihood, and search strategy using probabilistic programs in a sufficiently expressive language. The final probabilistic program is written in under 70 lines of probabilistic code in Venture. The paper demonstrates an application to time series clustering that involves a non-parametric extension to ABCD, experiments for interpolation and extrapolation on real-world econometric data, and improvements in accuracy over both non-parametric and standard regression baselines.},
}
@article{saad2017search,
author = {Saad, Feras and Casarsa, Leonardo and Mansinghka, Vikash K.},
title = {Probabilistic search for structured data via probabilistic programming and nonparametric {B}ayes},
year = 2017,
journal = {arXiv preprint},
volume = {arXiv:1704.01087},
url_paper = {https://arxiv.org/pdf/1704.01087.pdf},
url_link = {https://arxiv.org/abs/1704.01087},
keywords = {probabilistic search, nonparametric Bayes},
abstract = {Databases are widespread, yet extracting relevant data can be difficult. Without substantial domain knowledge, multivariate search queries often return sparse or uninformative results. This paper introduces an approach for searching structured data based on probabilistic programming and nonparametric Bayes. Users specify queries in a probabilistic language that combines standard SQL database search operators with an information theoretic ranking function called predictive relevance. Predictive relevance can be calculated by a fast sparse matrix algorithm based on posterior samples from CrossCat, a nonparametric Bayesian model for high-dimensional, heterogeneously-typed data tables. The result is a flexible search technique that applies to a broad class of information retrieval problems, which we integrate into BayesDB, a probabilistic programming platform for probabilistic data analysis. This paper demonstrates applications to databases of US colleges, global macroeconomic indicators of public health, and classic cars. We found that human evaluators often prefer the results from probabilistic search to results from a standard baseline.},
}
@article{towner2017agents,
title = {Probabilistic programs for inferring the goals of autonomous agents},
author = {Cusumano-Towner, Marco F. and Radul, Alexey and Wingate, David and Mansinghka, Vikash K.},
journal = {arXiv preprint},
volume = {arXiv:1704.04977},
year = {2017},
url_paper = {https://arxiv.org/pdf/1704.04977.pdf},
url_link = {https://arxiv.org/abs/1704.04977},
keywords = {probabilistic goal inference, probabilistic programming, probabilistic robotics},
abstract = {Intelligent systems sometimes need to infer the probable goals of people, cars, and robots, based on partial observations of their motion. This paper introduces a class of probabilistic programs for formulating and solving these problems. The formulation uses randomized path planning algorithms as the basis for probabilistic models of the process by which autonomous agents plan to achieve their goals. Because these path planning algorithms do not have tractable likelihood functions, new inference algorithms are needed. This paper proposes two Monte Carlo techniques for these "likelihood-free" models, one of which can use likelihood estimates from neural networks to accelerate inference. The paper demonstrates efficacy on three simple examples, each using under 50 lines of probabilistic code.},
}
@inproceedings{towner2017encapsulating,
title = {Encapsulating models and approximate inference programs in probabilistic modules},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
booktitle = {Workshop on Probabilistic Programming Semantics (co-located with POPL 2017)},
year = 2017,
url_paper = {https://arxiv.org/pdf/1612.04759.pdf},
url_link = {http://pps2017.soic.indiana.edu/2017/01/13/encapsulating-models-and-approximate-inference-programs-in-probabilistic-modules/},
abstract = {This paper introduces the probabilistic module interface, which allows encapsulation of complex probabilistic models with latent variables alongside custom stochastic approximate inference machinery, and provides a platform-agnostic abstraction barrier separating the model internals from the host probabilistic inference system. The interface can be seen as a stochastic generalization of a standard simulation and density interface for probabilistic primitives. We show that sound approximate inference algorithms can be constructed for networks of probabilistic modules, and we demonstrate that the interface can be implemented using learned stochastic inference networks and MCMC and SMC approximate inference programs.},
}
@article{saeedi2017variational,
author = {Saeedi, Ardavan and Kulkarni, Tejas D. and Mansinghka, Vikash K. and Gershman, Samuel J.},
title = {Variational Particle Approximations},
journal = {Journal of Machine Learning Research},
volume = {18},
number = {69},
pages = {1--29},
year = 2017,
url_paper = {http://jmlr.org/papers/volume18/15-615/15-615.pdf},
url_link = {http://jmlr.org/papers/v18/15-615.html},
abstract = {Approximate inference in high-dimensional, discrete probabilistic models is a central problem in computational statistics and machine learning. This paper describes discrete particle variational inference (DPVI), a new approach that combines key strengths of Monte Carlo, variational and search-based techniques. DPVI is based on a novel family of particle-based variational approximations that can be fit using simple, fast, deterministic search techniques. Like Monte Carlo, DPVI can handle multiple modes, and yields exact results in a well-defined limit. Like unstructured mean-field, DPVI is based on optimizing a lower bound on the partition function; when this quantity is not of intrinsic interest, it facilitates convergence assessment and debugging. Like both Monte Carlo and combinatorial search, DPVI can take advantage of factorization, sequential structure, and custom search operators. This paper defines DPVI particle-based approximation family and partition function lower bounds, along with the sequential DPVI and local DPVI algorithm templates for optimizing them. DPVI is illustrated and evaluated via experiments on lattice Markov Random Fields, nonparametric Bayesian mixtures and block-models, and parametric as well as non-parametric hidden Markov models. Results include applications to real-world spike-sorting and relational modeling problems, and show that DPVI can offer appealing time/accuracy trade-offs as compared to multiple alternatives.},
}
@inproceedings{saad2016probabilistic,
title = {A probabilistic programming approach to probabilistic data analysis},
author = {Saad, Feras and Mansinghka Vikash K.},
booktitle = {NIPS 2016: Advances in Neural Information Processing Systems 29},
pages = {2011--2019},
year = 2016,
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/6060-a-probabilistic-programming-approach-to-probabilistic-data-analysis.pdf},
url_link = {https://papers.nips.cc/paper/6060-a-probabilistic-programming-approach-to-probabilistic-data-analysis},
abstract = {Probabilistic techniques are central to data analysis, but different approaches can be challenging to apply, combine, and compare. This paper introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include discriminative machine learning, hierarchical Bayesian models, multivariate kernel methods, clustering algorithms, and arbitrary probabilistic programs. We demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling definition language and structured query language. The practical value is illustrated in two ways. First, the paper describes an analysis on a database of Earth satellites, which identifies records that probably violate Kepler’s Third Law by composing causal probabilistic programs with nonparametric Bayes in 50 lines of probabilistic code. Second, it reports the lines of code and accuracy of CGPMs compared with baseline solutions from standard machine learning libraries.},
}
@inproceedings{schaechtle2016gp,
title = {Gaussian process structure learning via probabilistic inverse compilation},
author = {Schaechtle, Ulrich and Mansinghka, Vikash K.},
year = 2016,
booktitle = {Workshop on Interpretable Machine Learning in Complex Systems (co-located with NIPS 2016)},
url_paper = {https://arxiv.org/pdf/1611.07051v1.pdf},
}
@inproceedings{towner2016measuring,
title = {Measuring the non-asymptotic convergence of sequential Monte Carlo samplers using probabilistic programming},
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
year = 2016,
booktitle = {Workshop on Approximate Inference (co-located with NIPS 2016)},
url_paper = {https://arxiv.org/pdf/1612.02161v1.pdf},
}
@inproceedings{curlette2016monitoring,
title = {Monitoring the errors of discriminative models with probabilistic programming},
author = {Curlette, Christina and Schaechtle, Ulrich and Mansinghka, Vikash K.},
booktitle = {Workshop on Reliable Machine Learning in the Wild (co-located with NIPS 2016)},
year = 2016,
}
@article{mansinghka2016crosscat,
title = {Crosscat: A fully {B}ayesian, nonparametric method for analyzing heterogeneous, high-dimensional data},
author = {Mansinghka, Vikash K. and Shafto, Patrick and Jonas, Eric and Petschulat, Cap and Gasner Max and Tenenbaum, Joshua B.},
year = 2016,
journal = {Journal of Machine Learning Research},
volume = {17},
number = {138},
pages = {1-49},
url_paper = {http://jmlr.org/papers/volume17/11-392/11-392.pdf},
url_link = {http://jmlr.org/papers/v17/11-392.html},
abstract = {There is a widespread need for statistical methods that can analyze high-dimensional datasets without imposing restrictive or opaque modeling assumptions. This paper describes a domain-general data analysis method called CrossCat. CrossCat infers multiple non-overlapping views of the data, each consisting of a subset of the variables, and uses a separate nonparametric mixture to model each view. CrossCat is based on approximately Bayesian inference in a hierarchical, nonparametric model for data tables. This model consists of a Dirichlet process mixture over the columns of a data table in which each mixture component is itself an independent Dirichlet process mixture over the rows; the inner mixture components are simple parametric models whose form depends on the types of data in the table. CrossCat combines strengths of mixture modeling and Bayesian network structure learning. Like mixture modeling, CrossCat can model a broad class of distributions by positing latent variables, and produces representations that can be efficiently conditioned and sampled from for prediction. Like Bayesian networks, CrossCat represents the dependencies and independencies between variables, and thus remains accurate when there are multiple statistical signals. Inference is done via a scalable Gibbs sampling scheme; this paper shows that it works well in practice. This paper also includes empirical results on heterogeneous tabular data of up to 10 million cells, such as hospital cost and quality measures, voting records, unemployment rates, gene expression measurements, and images of handwritten digits. CrossCat infers structure that is consistent with accepted findings and common-sense knowledge in multiple domains and yields predictive accuracy competitive with generative, discriminative, and model-free alternatives.},
}
@article{saad2016analysis,
author = {Saad, Feras and Mansinghka, Vikash K.},
title = {Probabilistic data analysis with probabilistic programming},
year = 2016,
journal = {arXiv preprint},
volume = {arXiv:1608.05347},
url_paper = {https://arxiv.org/pdf/1608.05347.pdf},
url_link = {https://arxiv.org/abs/1608.05347},
abstract = {Probabilistic techniques are central to data analysis, but different approaches can be difficult to apply, combine, and compare. This paper introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include hierarchical Bayesian models, multivariate kernel methods, discriminative machine learning, clustering algorithms, dimensionality reduction, and arbitrary probabilistic programs. We also demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling language and a structured query language. The practical value is illustrated in two ways. First, CGPMs are used in an analysis that identifies satellite data records which probably violate Kepler’s Third Law, by composing causal probabilistic programs with non-parametric Bayes in under 50 lines of probabilistic code. Second, for several representative data analysis tasks, we report on lines of code and accuracy measurements of various CGPMs, plus comparisons with standard baseline solutions from Python and MATLAB libraries.},
}
@article{towner2016quantifying,
author = {Cusumano-Towner, Marco F. and Mansinghka, Vikash K.},
title = {Quantifying the probable approximation error of probabilistic inference programs},
year = 2016,
journal = {arXiv preprint},
volume = {arXiv:1606.00068},
url_paper = {https://arxiv.org/pdf/1606.00068.pdf},
url_link = {https://arxiv.org/abs/1606.00068},
abstract = {This paper introduces a new technique for quantifying the approximation error of a broad class of probabilistic inference programs, including ones based on both variational and Monte Carlo approaches. The key idea is to derive a subjective bound on the symmetrized KL divergence between the distribution achieved by an approximate inference program and its true target distribution. The bound’s validity (and subjectivity) rests on the accuracy of two auxiliary probabilistic programs: (i) a “reference” inference program that defines a gold standard of accuracy and (ii) a “meta-inference” program that answers the question “what internal random choices did the original approximate inference program probably make given that it produced a particular result?” The paper includes empirical results on inference problems drawn from linear regression, Dirichlet process mixture modeling, HMMs, and Bayesian networks. The experiments show that the technique is robust to the quality of the reference inference program and that it can detect implementation bugs that are not apparent from predictive performance.},
}
@mastersthesis{saad2016thesis,
author = {Saad, Feras A.},
title = {Probabilistic data analysis with probabilistic programming},
year = 2016,
school = {Massachusetts Institute of Technology},
url_link = {https://dspace.mit.edu/handle/1721.1/113164},
abstract = {Probabilistic techniques are central to data analysis, but dierent approaches can be challenging to apply, combine, and compare. This thesis introduces composable generative population models (CGPMs), a computational abstraction that extends directed graphical models and can be used to describe and compose a broad class of probabilistic data analysis techniques. Examples include hierarchical Bayesian models, multivariate kernel methods, discriminative machine learning, clustering algorithms, dimensionality reduction, and arbitrary probabilistic programs. We also demonstrate the integration of CGPMs into BayesDB, a probabilistic programming platform that can express data analysis tasks using a modeling language and a structured query language. The practical value is illustrated in two ways. First, CGPMs are used in an analysis that identifies satellite data records which probably violate Kepler's Third Law, by composing causal probabilistic programs with non-parametric Bayes in under 50 lines of probabilistic code. Second, for several representative data analysis tasks, we report on lines of code and accuracy measurements of various CGPMs, plus comparisons with standard baseline solutions from Python and MATLAB libraries.},
note = {MIT Charles and Jennifer Johnson Computer Science Master of Engineering Thesis Award, First Place.}
}
@mastersthesis{lu2016thesis,
author = {Lu, Anthony},
title = {Venture: An extensible platform for probabilistic meta-programming},
year = 2016,
school = {Massachusetts Institute of Technology},
url_link = {https://dspace.mit.edu/handle/1721.1/113160},
abstract = {This thesis describes Venture, an extensible platform for probabilistic meta-programming. In Venture, probabilistic generative models, probability density functions, and probabilistic inference algorithms are all firstclass objects. Any Venture program that makes random choices can be treated as a probabilistic model defined over the space of possible executions of the program. Such probabilistic model programs can also be run while recording the random choices that they make. Modeling and inference in Venture involves two additional classes of probabilistic programs. The first, probability density meta-programs partially describe the input-output behavior of probabilistic model programs. The second, stochastic inference meta-programs identify probable executions of model programs given stochastic constraints, and typically use density metaprograms as guides. Unlike other probabilistic programming platforms, Venture allows model programs, density meta-programs, and inference meta-programs to be written as user-space code in a single probabilistic programming language. Venture is essentially a Lisp-like higher-order language augmented with two novel abstractions: (i) probabilistic execution traces, a first-class object that represents the sequence of random choices that a probabilistic program makes, and (ii) stochastic procedures, which encapsulate the probabilistic programs and meta-programs needed to allow simple probability distributions, user-space VentureScript programs, and foreign probabilistic programs to be treated uniformly as components of probabilistic computations. Venture also provides runtime support for stochastic regeneration of execution trace fragments that makes use of the programs and meta-programs of all stochastic procedures invoked during the execution of the original traced program. This thesis describes a new prototype implementation of Venture incorporating these ideas and illustrates the flexibility of Venture by giving concise user-space implementations of primitives and inference strategies that have been built in to Church as well as other probabilistic languages.},
}
@inproceedings{kulkarni2015picture,
title = {Picture: A probabilistic programming language for scene perception},
author = {Kulkarni, Tejas and Kohli, Pushmeet and Tenenbaum, Joshua B. and Mansinghka, Vikash K.},
booktitle = {CVPR 2015: IEEE Conference on Computer Vision and Pattern Recognition},
pages = {4390--4399},
year = 2015,
publisher = {IEEE},
url_paper = {https://mrkulk.github.io/www_cvpr15/1999.pdf},
url_supplement = {https://mrkulk.github.io/www_cvpr15/1999-supp.zip},
abstract = {Recent progress on probabilistic modeling and statistical learning, coupled with the availability of large training datasets, has led to remarkable progress in computer vision. Generative probabilistic models, or “analysis-by-synthesis” approaches, can capture rich scene structure but have been less widely applied than their discriminative counterparts, as they often require considerable problem-specific engineering in modeling and inference, and inference is typically seen as requiring slow, hypothesize-and-test Monte Carlo methods. Here we present Picture, a probabilistic programming language for scene understanding that allows researchers to express complex generative vision models, while automatically solving them using fast general-purpose inference machinery. Picture provides a stochastic scene language that can express generative models for arbitrary 2D/3D scenes, as well as a hierarchy of representation layers for comparing scene hypotheses with observed images by matching not simply pixels, but also more abstract features (e.g., contours, deep neural network activations). Inference can flexibly integrate advanced Monte Carlo strategies with fast bottomup data-driven methods. Thus both representations and inference strategies can build directly on progress in discriminatively trained systems to make generative vision more robust and efficient. We use Picture to write programs for 3D face analysis, 3D human pose estimation, and 3D object reconstruction – each competitive with specially engineered baselines.},
}
@inproceedings{demeent2015particle,
author = {{van de Meent}, Jan-Willem and Yang, Hongseok and Mansinghka, Vikash K. and Wood, Frank},
title = {Particle {G}ibbs with ancestor sampling for probabilistic programs},
year = 2015,
booktitle = {AISTATS 2015: Proceedings of the 18th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {38},
pages = {986--994},
publisher = {PMLR},
url_paper = {http://www.jmlr.org/proceedings/papers/v38/vandemeent15.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v38/vandemeent15.html},
abstract = {Particle Markov chain Monte Carlo techniques rank among current state-of-the-art methods for probabilistic program inference. A drawback of these techniques is that they rely on importance resampling, which results in degenerate particle trajectories and a low effective sample size for variables sampled early in a program. We here develop a formalism to adapt ancestor resampling, a technique that mitigates particle degeneracy, to the probabilistic programming setting. We present empirical results that demonstrate nontrivial performance gains.},
}
@inproceedings{huggins2015jump,
title = {{JUMP}-means: Small variance asymptotics for {M}arkov jump processes},
author = {Huggins, Jonathan and Narasimhan, Karthik and Saeedi, Aradavan and Mansinghka, Vikash K.},
booktitle = {ICML 2015: International Conference on Machine Learning},
series = {Proceedings of Machine Learning Research},
volume = {37},
year = 2015,
pages = {693--701},
url_paper = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15.pdf},
url_supplement = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15-supp.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v37/hugginsa15.html},
abstract = {Markov jump processes (MJPs) are used to model a wide range of phenomena from disease progression to RNA path folding. However, maximum likelihood estimation of parametric models leads to degenerate trajectories and inferential performance is poor in nonparametric models. We take a small-variance asymptotics (SVA) approach to overcome these limitations. We derive the small-variance asymptotics for parametric and nonparametric MJPs for both directly observed and hidden state models. In the parametric case we obtain a novel objective function which leads to non-degenerate trajectories. To derive the nonparametric version we introduce the gamma-gamma process, a novel extension to the gamma-exponential process. We propose algorithms for each of these formulations, which we call JUMP-means. Our experiments demonstrate that JUMP-means is competitive with or outperforms widely used MJP inference approaches in terms of both speed and reconstruction accuracy.},
}
@article{mansinghka2015bayesdb,
title = {Bayes{DB}: A probabilistic programming system for querying the probable implications of data},
author = {Mansinghka, Vikash K. and Tibbetts, Richard and Baxter, Jay and Shafto, Pat and Eaves, Baxter},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1512.05006},
url_paper = {https://arxiv.org/pdf/1512.05006.pdf},
url_link = {https://arxiv.org/abs/1512.05006},
abstract = {Is it possible to make statistical inference broadly accessible to non-statisticians without sacrificing mathematical rigor or inference quality? This paper describes BayesDB, a probabilistic programming platform that aims to enable users to query the probable implications of their data as directly as SQL databases enable them to query the data itself. This paper focuses on four aspects of BayesDB: (i) BQL, an SQL-like query language for Bayesian data analysis, that answers queries by averaging over an implicit space of probabilistic models; (ii) techniques for implementing BQL using a broad class of multivariate probabilistic models; (iii) a semi-parametric Bayesian model-builder that auomatically builds ensembles of factorial mixture models to serve as baselines; and (iv) MML, a “meta-modeling” language for imposing qualitative constraints on the model-builder and combining baseline models with custom algorithmic and statistical models that can be implemented in external software. BayesDB is illustrated using three applications: cleaning and exploring a public database of Earth satellites; assessing the evidence for temporal dependence between macroeconomic indicators; and analyzing a salary survey.},
}
@article{schaechtle2015memoization,
title = {Probabilistic programming with {G}aussian process memoization},
author = {Schaechtle, Ulrich and Zinberg, Ben and Radul, Alexey and Stathis, Kostas and Mansinghka, Vikash K.},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1512.05665},
url_paper = {https://arxiv.org/pdf/1512.05665v2.pdf},
url_link = {https://arxiv.org/abs/1512.05665v2},
abstract = {Gaussian Processes (GPs) are widely used tools in statistics, machine learning, robotics, computer vision, and scientific computation. However, despite their popularity, they can be difficult to apply; all but the simplest classification or regression applications require specification and inference over complex covariance functions that do not admit simple analytical posteriors. This paper shows how to embed Gaussian processes in any higherorder probabilistic programming language, using an idiom based on memoization, and demonstrates its utility by implementing and extending classic and state-of-the-art GP applications. The interface to Gaussian processes, called gpmem, takes an arbitrary real-valued computational process as input and returns a statistical emulator that automatically improve as the original process is invoked and its input-output behavior is recorded. The flexibility of gpmem is illustrated via three applications: (i) Robust GP regression with hierarchical hyper-parameter learning, (ii) discovering symbolic expressions from time-series data by fully Bayesian structure learning over kernels generated by a stochastic grammar, and (iii) a bandit formulation of Bayesian optimization with automatic inference and action selection. All applications share a single 50-line Python library and require fewer than 20 lines of probabilistic code each.},
}
@article{chen2015sublinear,
title = {Sublinear-time approximate {MCMC} transitions for probabilistic programs},
author = {Chen, Yutian and Mansinghka, Vikash K. and Ghahramani, Zoubin},
year = 2015,
journal = {arXiv preprint},
volume = {arXiv:1411.1690},
url_paper = {https://arxiv.org/pdf/1411.1690.pdf},
url_link = {https://arxiv.org/abs/1411.1690},
abstract = {Probabilistic programming languages can simplify the development of machine learning techniques, but only if inference is sufficiently scalable. Unfortunately, Bayesian parameter estimation for highly coupled models such as regressions and state-space models still scales poorly; each MCMC transition takes linear time in the number of observations. This paper describes a sublinear-time algorithm for making Metropolis-Hastings (MH) updates to latent variables in probabilistic programs. The approach generalizes recently introduced approximate MH techniques: instead of subsampling data items assumed to be independent, it subsamples edges in a dynamically constructed graphical model. It thus applies to a broader class of problems and interoperates with other general-purpose inference techniques. Empirical results, including confirmation of sublinear per-transition scaling, are presented for Bayesian logistic regression, nonlinear classification via joint Dirichlet process mixtures, and parameter estimation for stochastic volatility models (with state estimation via particle MCMC). All three applications use the same implementation, and each requires under 20 lines of probabilistic code.},
}
@mastersthesis{zinberg2015thesis,
author = {Zinberg, Benjamin},
title = {Bayesian optimization as a probabilistic meta-program},
year = 2015,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/106374/967346485-MIT.pdf?sequence=1},
url_link = {https://dspace.mit.edu/handle/1721.1/106374},
abstract = {This thesis answers two questions: 1. How should probabilistic programming languages incorporate Gaussian processes? and 2. Is it possible to write a probabilistic meta-program for Bayesian optimization, a probabilistic meta-algorithm that can combine regression frameworks such as Gaussian processes with a broad class of parameter estimation and optimization techniques? We answer both questions affirmatively, presenting both an implementation and informal semantics for Gaussian process models in probabilistic programming systems, and a probabilistic meta-program for Bayesian optimization. The meta-program exposes modularity common to a wide range of Bayesian optimization methods in a way that is not apparent from their usual treatment in statistics.},
}
@inproceedings{wood2014anglican,
title = {A new approach to probabilistic programming inference},
author = {Wood, Frank and {van de Meent}, Jan-Willem and Mansinghka, Vikash K.},
booktitle = {AISTATS 2014: Proceedings of the 17th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {33},
year = 2014,
publisher = {PMLR},
pages = {1024--1032},
url_paper = {http://www.jmlr.org/proceedings/papers/v33/wood14.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v33/wood14.html},
abstract = {We introduce and demonstrate a new approach to inference in expressive probabilistic programming languages based on particle Markov chain Monte Carlo. Our approach is simple to implement and easy to parallelize. It applies to Turing-complete probabilistic programming languages and supports accurate inference in models that make use of complex control flow, including stochastic recursion. It also includes primitives from Bayesian nonparametric statistics. Our experiments show that this approach can be more efficient than previously introduced single-site Metropolis-Hastings methods.},
}
@inproceedings{saeedi2014automatic,
title = {Automatic inference for inverting software simulators via probabilistic programming},
author = {Saeedi,Ardavan and Firoiu, Vlad and Mansinghka, Vikash K.},
booktitle = {Workshop on Automatic Machine Learning (co-located with ICML 2014)},
year = 2014,
url_paper = {https://arxiv.org/pdf/1506.00308.pdf},
url_link = {https://arxiv.org/pdf/1506.00308.html},
abstract = {Models of complex systems are often formalized as sequential software simulators: computationally intensive programs that iteratively build up probable system configurations given parameters and initial conditions. These simulators enable modelers to capture effects that are difficult to characterize analytically or summarize statistically. However, in many realworld applications, these simulations need to be inverted to match the observed data. This typically requires the custom design, derivation and implementation of sophisticated inversion algorithms. Here we give a framework for inverting a broad class of complex software simulators via probabilistic programming and automatic inference, using under 20 lines of probabilistic code. Our approach is based on a formulation of inversion as approximate inference in a simple sequential probabilistic model. We implement four inference strategies, including Metropolis-Hastings, a sequentialized Metropolis-Hastings scheme, and a particle Markov chain Monte Carlo scheme, requiring 4 or fewer lines of probabilistic code each. We demonstrate our framework by applying it to invert a real geological software simulator from the oil and gas industry.},
}
@article{mansinghka2014venture,
title = {Venture: A higher-order probabilistic programming platform with programmable inference},
author = {Mansinghka, Vikash K. and Selsam, Daniel and Perov, Yura},
year = 2014,
journal = {arXiv preprint},
volume = {arXiv:1404.0099},
url_paper = {https://arxiv.org/pdf/1404.0099.pdf},
url_link = {https://arxiv.org/abs/1404.0099},
abstract = {We describe Venture, an interactive virtual machine for probabilistic programming that aims to be sufficiently expressive, extensible, and efficient for general-purpose use. Like Church, probabilistic models and inference problems in Venture are specified via a Turing-complete, higher-order probabilistic language descended from Lisp. Unlike Church, Venture also provides a compositional language for custom inference strategies, assembled from scalable implementations of several exact and approximate techniques. Venture is thus applicable to problems involving widely varying model families, dataset sizes and runtime/accuracy constraints. We also describe four key aspects of Venture’s implementation that build on ideas from probabilistic graphical models. First, we describe the stochastic procedure interface (SPI) that specifies and encapsulates primitive random variables, analogously to conditional probability tables in a Bayesian network. The SPI supports custom control flow, higher-order probabilistic procedures, partially exchangeable sequences and “likelihood-free” stochastic simulators, all with custom proposals. It also supports the integration of external models that dynamically create, destroy and perform inference over latent variables hidden from Venture. Second, we describe probabilistic execution traces (PETs), which represent execution histories of Venture programs. Like Bayesian networks, PETs capture conditional dependencies, but PETs also represent existential dependencies and exchangeable coupling. Third, we describe partitions of execution histories called scaffolds that can be efficiently constructed from PETs and that factor global inference problems into coherent sub-problems. Finally, we describe a family of stochastic regeneration algorithms for efficiently modifying PET fragments contained within scaffolds without visiting conditionally independent random choices. Stochastic regeneration insulates inference algorithms from the complexities introduced by changes in execution structure, with runtime that scales linearly in cases where previous approaches often scaled quadratically and were therefore impractical. We show how to use stochastic regeneration and the SPI to implement general-purpose inference strategies such as Metropolis-Hastings, Gibbs sampling, and blocked proposals based on hybrids with both particle Markov chain Monte Carlo and mean-field variational inference techniques.},
}
@article{mansinghka2014machines,
title = {Building fast {B}ayesian computing machines out of intentionally stochastic parts},
author = {Mansinghka, Vikash K. and Jonas, Eric},
year = 2014,
journal = {arXiv preprint},
volume = {arXiv:1402:4914},
url_paper = {http://probcomp.csail.mit.edu/wordpress/wp-content/uploads/2020/01/1402.4914_withSupplement_Jan20.pdf},
url_link = {http://probcomp.csail.mit.edu/wordpress/wp-content/uploads/2020/01/1402.4914_withSupplement_Jan20.pdf},
abstract = {The brain interprets ambiguous sensory information faster and more reliably than modern computers, using neurons that are slower and less reliable than logic gates. But Bayesian inference, which underpins many computational models of perception and cognition, appears computationally challenging even given modern transistor speeds and energy budgets. The computational principles and structures needed to narrow this gap are unknown. Here we show how to build fast Bayesian computing machines using intentionally stochastic, digital parts, narrowing this efficiency gap by multiple orders of magnitude. We find that by connecting stochastic digital components according to simple mathematical rules, one can build massively parallel, low precision circuits that solve Bayesian inference problems and are compatible with the Poisson firing statistics of cortical neurons. We evaluate circuits for depth and motion perception, perceptual learning and causal reasoning, each performing inference over 10,000+ latent variables in real time — a 1,000x speed advantage over commodity microprocessors. These results suggest a new role for randomness in the engineering and reverse-engineering of intelligent computation.},
}
@inproceedings{mansinghka2013gpgp,
title = {Approximate {B}ayesian image interpretation using generative probabilistic graphics programs},
author = {Mansinghka, Vikash K. and Kulkarni, Tejas and Perov, Yura and Tenenbaum, Joshua B.},
booktitle = {NIPS 2013: Advances in Neural Information Processing Systems 26},
year = 2013,
pages = {1520--1528},
publisher = {Curran Associates},
url_paper = {http://papers.nips.cc/paper/4881-approximate-bayesian-image-interpretation-using-generative-probabilistic-graphics-programs.pdf},
url_link = {http://papers.nips.cc/paper/4881-approximate-bayesian-image-interpretation-using-generative-probabilistic-graphics-programs},
abstract = {The idea of computer vision as the Bayesian inverse problem to computer graphics has a long history and an appealing elegance, but it has proved difficult to directly implement. Instead, most vision tasks are approached via complex bottom-up processing pipelines. Here we show that it is possible to write short, simple probabilistic graphics programs that define flexible generative models and to automatically invert them to interpret real-world images. Generative probabilistic graphics programs (GPGP) consist of a stochastic scene generator, a renderer based on graphics software, a stochastic likelihood model linking the renderer’s output and the data, and latent variables that adjust the fidelity of the renderer and the tolerance of the likelihood. Representations and algorithms from computer graphics are used as the deterministic backbone for highly approximate and stochastic generative models. This formulation combines probabilistic programming, computer graphics, and approximate Bayesian computation, and depends only on generalpurpose, automatic inference techniques. We describe two applications: reading sequences of degraded and adversarially obscured characters, and inferring 3D road models from vehicle-mounted camera images. Each of the probabilistic graphics programs we present relies on under 20 lines of probabilistic code, and yields accurate, approximately Bayesian inferences about real-world images.},
}
@inproceedings{deka2013power,
title = {Markov chain algorithms: A template for building future robust low power systems},
author = {Deka, Biplab and Birklykke, Alex and Duwe, Henry and Mansinghka, Vikash K. and Kumar, Rakesh},
booktitle = {ACSSC 2013: Proceedings of the 47th Annual Asilomar Conference on Signals, Systems, and Computers},
year = 2013,
publisher = {IEEE},
url_paper = {http://rakeshk.crhc.illinois.edu/asilomar13_cam.pdf},
abstract = {Although computational systems are looking towards post CMOS devices in the pursuit of lower power, the inherent unreliability of such devices makes it difficult to design robust systems without additional power overheads for guaranteeing robustness. As such, algorithmic structures with inherent ability to tolerate computational errors are of significant interest. We propose to cast applications as stochastic algorithms based on Markov chains as such algorithms are both sufficiently general and tolerant to transition errors. We show with four example applications - boolean satisfiability (SAT), sorting, LDPC decoding and clustering - how applications can be cast as Markov Chain algorithms. Using algorithmic fault injection techniques, we demonstrate the robustness of these implementations to transition errors with high error rates. Based on these results, we make a case for using Markov Chains as an algorithmic template for future robust low power systems.},
}
@article{sanborn2013intuitive,
title = {Reconciling intuitive physics and {N}ewtonian mechanics for colliding objects},
author = {Sanborn, Adam and Mansinghka, Vikash K. and Griffiths, Thomas},
journal = {Psychological Review},
volume = {120},
number = {2},
pages = {411-437},
year = 2013,
url_paper = {https://cocosci.berkeley.edu/tom/papers/collisionPsychReview_inpress.pdf},
url_link = {https://www.ncbi.nlm.nih.gov/pubmed/23458084},
abstract = {People have strong intuitions about the influence objects exert upon one another when they collide. Because people’s judgments appear to deviate from Newtonian mechanics, psychologists have suggested that people depend on a variety of task-specific heuristics. This leaves open the question of how these heuristics could be chosen, and how to integrate them into a unified model that can explain human judgments across a wide range of physical reasoning tasks. We propose an alternative framework, in which people’s judgments are based on optimal statistical inference over a Newtonian physical model that incorporates sensory noise and intrinsic uncertainty about the physical properties of the objects being viewed. This “noisy Newton” framework can be applied to a multitude of judgments, with people’s answers determined by the uncertainty they have for physical variables and the constraints of Newtonian mechanics. We investigate a range of effects in mass judgments that have previously been taken as strong evidence for heuristic use and show that they are well explained by the interplay between Newtonian constraints and sensory uncertainty. We also consider an extended model that handles causality judgments, and obtain good quantitative agreement with human judgments across tasks that involve different judgment types with a single consistent set of parameters.},
}
@article{shafto2011crosscat,
title = {A probabilistic model of cross-categorization},
author = {Shafto, Patrick and Kemp, Charles and Mansinghka, Vikash K. and Tenenbaum, Joshua B.},
journal = {Cognition},
volume = {120},
number = 1,
pages = {1--25},
year = 2011,
url_paper = {http://www.psy.cmu.edu/~ckemp/papers/shaftokmt11_aprobabilisticmodelofcrosscategorization.pdf},
abstract = {Most natural domains can be represented in multiple ways: we can categorize foods in terms of their nutritional content or social role, animals in terms of their taxonomic groupings or their ecological niches, and musical instruments in terms of their taxonomic categories or social uses. Previous approaches to modeling human categorization have largely ignored the problem of cross-categorization, focusing on learning just a single system of categories that explains all of the features. Cross-categorization presents a difficult problem: how can we infer categories without first knowing which features the categories are meant to explain? We present a novel model that suggests that human cross-categorization is a result of joint inference about multiple systems of categories and the features that they explain. We also formalize two commonly proposed alternative explanations for cross-categorization behavior: a features-first and an objects-first approach. The features-first approach suggests that cross-categorization is a consequence of attentional processes, where features are selected by an attentional mechanism first and categories are derived second. The objects-first approach suggests that cross-categorization is a consequence of repeated, sequential attempts to explain features, where categories are derived first, then features that are poorly explained are recategorized. We present two sets of simulations and experiments testing the models’ predictions about human categorization. We find that an approach based on joint inference provides the best fit to human categorization behavior, and we suggest that a full account of human category learning will need to incorporate something akin to these capabilities.},
}
@inproceedings{freer2010computationally,
title = {When are probabilistic programs probably computationally tractable?},
author = {Freer, Cameron and Mansinghka, Vikash K. and Roy, Daniel},
booktitle = {Workshop on Monte Carlo Methods for Modern Applications (co-located with NIPS 2010)},
year = 2010,
url_paper = {http://danroy.org/papers/FreerManRoy-NIPSMC-2010.pdf},
abstract = {We study the computational complexity of Bayesian inference through the lens of simulating probabilistic programs. Our approach is grounded in new conceptual hypotheses about some intrinsic drivers of computational complexity, drawn from the experience of computational Bayesian statisticians. We sketch a research program to address two issues, via a combination of theory and experiments: (a) What conditions suffice for Bayesian inference by posterior simulation to be computationally efficient? (b) How should probabilistic programmers write their probabilistic programs so as to maximize their probable tractability? The research program we articulate, if successful, may help to explain the gap between the unreasonable effectiveness of some simple, widely-used sampling algorithms and the classical study of reductions to Bayes from hard problems of deduction, arithmetic, and optimization.},
}
@inproceedings{mansinghka2009systematic,
title = {Exact and approximate sampling by systematic stochastic search},
author = {Mansinghka, Vikash K. and Roy, Daniel and Jonas, Eric and Tenenbaum, Joshua B.},
booktitle = {AISTATS 2009: Proceedings of the 12th International Conference on Artificial Intelligence and Statistics},
series = {Proceedings of Machine Learning Research},
volume = {5},
pages = {400--407},
year = 2009,
publisher = {PMLR},
url_paper = {http://web.mit.edu/cocosci/Papers/sss_camera.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v5/mansinghka09a.html},
abstract = {We introduce adaptive sequential rejection sampling, an algorithm for generating exact samples from high-dimensional, discrete distributions, building on ideas from classical AI search. Just as systematic search algorithms like A* recursively build complete solutions from partial solutions, sequential rejection sampling recursively builds exact samples over high-dimensional spaces from exact samples over lower-dimensional subspaces. Our algorithm recovers widely-used particle filters as an approximate variant without adaptation, and a randomized version of depth first search with backtracking when applied to deterministic problems. In this paper, we present the mathematical and algorithmic underpinnings of our approach and measure its behavior on undirected and directed graphical models, obtaining exact and approximate samples in a range of situations.},
}
@inproceedings{sanborn2009intuitive,
title = {A {B}ayesian framework for modeling intuitive dynamics},
author = {Sanborn, Adam and Mansinghka, Vikash K. and Griffiths, Thomas},
booktitle = {COGSCI 2009: Proceedings of the 31st Annual Conference of the Cognitive Science Society},
year = 2009,
url_paper = {https://cocosci.berkeley.edu/tom/papers/collisions.pdf},
abstract = {People have strong intuitions about the masses of objects and the causal forces that they exert upon one another. These intuitions have been explored through a variety of tasks, in particular judging the relative masses of objects involved in collisions and evaluating whether one object caused another to move. We present a single framework for explaining two types of judgments that people make about the dynamics of objects, based on Bayesian inference. In this framework, we define a particular model of dynamics – essentially Newtonian physics plus Gaussian noise – which makes predictions about the trajectories of objects following collisions based on their masses. By applying Bayesian inference, it becomes possible to reason from trajectories back to masses, and to reason about whether one object caused another to move. We use this framework to predict human causality judgments using data collected from a mass judgment task.},
}
@inproceedings{mansinghka2009crosscat,
title = {Cross-categorization: A method for discovering multiple overlapping clusterings},
author = {Mansinghka, Vikash K. and Jonas, Eric and Petschulat, Cap and Cronin, Beau and Shafto, Pat and Tenenbaum, Joshua B.},
booktitle = {Workshop on Nonparametric Bayes (co-located with NIPS 2009)},
year = 2009,
url_paper = {http://web.mit.edu/vkm/www/crosscat.pdf},
abstract = {Model-based clustering techniques, including inference in Dirichlet process mixture models, have difficulty when different dimensions are best explained by very different clusterings. We introduce cross-categorization, an unsupervised learning technique that overcomes this basic limitation. Based on MCMC inference in a novel nonparametric Bayesian model, cross-categorization automatically discovers the number of independent nonparametric Bayesian models needed to explain the data, using a separate Dirichlet process mixture model for each group in an inferred partition of the dimensions. Unlike a DP mixture, our model is exchangeable over both the rows of a heterogeneous data array (the samples) and the columns (new dimensions), and can model any dataset as the number of samples and dimensions both go to infinity. We demonstrate the efficiency and robustness of our algorithm, including experiments on the full Dartmouth Health Atlas dataset without any preprocessing, showing that it finds veridical causal structure.},
}
@inproceedings{goodman2008church,
author = {Goodman, Noah and Mansinghka, Vikash K. and Roy, Daniel and Bonawitz, Keith and Tenenbaum, Joshua B.},
title = {Church: A universal language for generative models},
booktitle = {UAI 2008: Proceedings of the 24th Conference on Uncertainty in Artificial Intelligence},
pages = {220--229},
year = 2008,
publisher = {AUAI Press},
url_paper = {https://arxiv.org/pdf/1206.3255v2.pdf},
url_link = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1346&proceeding_id=24},
abstract = {Formal languages for probabilistic modeling enable re-use, modularity, and descriptive clarity, and can foster generic inference techniques. We introduce Church, a universal language for describing stochastic generative processes. Church is based on the Lisp model of lambda calculus, containing a pure Lisp as its deterministic subset. The semantics of Church is defined in terms of evaluation histories and conditional distributions on such histories. Church also includes a novel language construct, the stochastic memoizer, which enables simple description of many complex non-parametric models. We illustrate language features through several examples, including: a generalized Bayes net in which parameters cluster over trials, infinite PCFGs, planning by inference, and various non-parametric clustering models. Finally, we show how to implement query on any Church program, exactly and approximately, using Monte Carlo techniques.},
}
@inproceedings{roy2008stochastic,
title = {A stochastic programming perspective on nonparametric {B}ayes},
author = {Roy, Daniel and Mansinghka, Vikash K. and Goodman, Noah and Tenenbaum Joshua B.},
booktitle = {Workshop on Nonparametric Bayes (co-located with ICML 2008)},
year = 2008,
url_paper = {http://danroy.org/papers/RoyManGooTen-ICMLNPB-2008.pdf},
abstract = {We use Church, a Turing-universal language for stochastic generative processes and the probability distributions they induce, to study and extend several objects in nonparametric Bayesian statistics. We connect exchangeability and de Finetti measures with notions of purity and closures from functional programming. We exploit delayed evaluation to provide finite, machine-executable representations for various nonparametric Bayesian objects. We relate common uses of the Dirichlet process to a stochastic generalization of memoization, and use this abstraction to compactly describe and extend several nonparametric models. Finally, we briefly discuss issues of computability and inference.},
}
@techreport{mansinghka2008stochastic,
author = {Mansinghka, Vikash K. and Jonas, Eric and Tenenbaum, Joshua B.},
title = {Stochastic digital circuits for probabilistic inference},
year = 2008,
number = {MIT-CSAIL-TR-2008-069},
institution = {Computer Science and Artificial Intelligence Laboratory},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/43712/MIT-CSAIL-TR-2008-069.pdf},
url_link = {https://dspace.mit.edu/handle/1721.1/43712},
abstract = {We introduce combinational stochastic logic, an abstraction that generalizes deterministic digital circuit design (based on Boolean logic gates) to the probabilistic setting. We show how this logic can be combined with techniques from contemporary digital design to generate stateless and stateful circuits for exact and approximate sampling from a range of probability distributions. We focus on Markov chain Monte Carlo algorithms for Markov random fields, using massively parallel circuits. We implement these circuits on commodity reconfigurable logic and estimate the resulting performance in time, space and price. Using our approach, these simple and general algorithms could be affordably run for thousands of iterations on models with hundreds of thousands of variables in real time},
}
@phdthesis{mansinghka2009thesis,
title = {Natively probabilistic computation},
author = {Mansinghka, Vikash K.},
year = 2009,
school = {Massachusetts Institute of Technology},
url_paper = {http://web.mit.edu/vkm/www/vkm-dissertation.pdf},
url_link = {http://hdl.handle.net/1721.1/47892},
note = {Winner of the Geroge M. Sprowls dissertation award.},
}
@mastersthesis{mansinghka2009meng,
title = {Nonparametric {B}ayesian models for supervised and unsupervised learning},
author = {Mansinghka, Vikash K.},
year = 2009,
school = {Massachusetts Institute of Technology},
url_paper = {https://dspace.mit.edu/bitstream/handle/1721.1/53172/517976994-MIT.pdf?sequence=2},
url_link = {https://dspace.mit.edu/handle/1721.1/53172},
abstract = {I introduce two nonparametric Bayesian methods for solving problems of supervised and unsupervised learning. The first method simultaneously learns causal networks and causal theories from data. For example, given synthetic co-occurrence data from a simple causal model for the medical domain, it can learn relationships like "having a flu causes coughing", while also learning that observable quantities can be usefully grouped into categories like diseases and symptoms, and that diseases tend to cause symptoms, not the other way around. The second method is an online algorithm for learning a prototype-based model for categorial concepts, and can be used to solve problems of multiclass classification with missing features. I apply it to problems of categorizing newsgroup posts and recognizing handwritten digits. These approaches were inspired by a striking capacity of human learning, which shouldalso be a desideratum for any intelligent system: the ability to learn certain kinds of "simple" or "natural" structures very quickly, while still being able to learn arbitrary - and arbitrarily complex - structures given enough data. In each case, I show how nonparametric Bayesian modeling and inference based on stochastic simulation give us some of the tools we need to achieve this goal.},
}
@inproceedings{goodman2007learning,
title = {Learning grounded causal models},
author = {Goodman, Noah and Mansinghka, Vikash K. and Tenenbaum, Joshua B},
year = 2007,
booktitle = {COGSCI 2007: Proceedings of the 29th Annual Conference of the Cognitive Science Society},
pages = {305},
url_paper = {https://cocolab.stanford.edu/papers/GoodmanEtAl2007-Cogsci.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.422.2173},
note = {Winner of the Cognitive Science Society Computational Modeling Prize for Perception and Action.},
abstract = {We address the problem of learning grounded causal models: systems of concepts that are connected by causal relations and explicitly grounded in perception. We present a Bayesian framework for learning these models—both a causal Bayesian network structure over variables and the consequential region of each variable in perceptual space—from dynamic perceptual evidence. Using a novel experimental paradigm we show that humans are able to learn grounded causal models, and that the Bayesian model accounts well for human performance.},
}
@inproceedings{goldwater2007modeling,
title = {Modeling human performance in statistical word segmentation},
author = {Goldwater, Frank and Mansinghka, Vikash K. and Griffiths, Thomas and Tenenbaum, Joshua B.},
booktitle = {COGSCI 2007: Proceedings of the 29th Annual Conference of the Cognitive Science Society},
year = 2007,
pages = {281},
url_paper = {http://csjarchive.cogsci.rpi.edu/proceedings/2007/docs/p281.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.124.6889},
abstract = {What mechanisms support the ability of human infants, adults, and other primates to identify words from fluent speech using distributional regularities? In order to better characterize this ability, we collected data from adults in an artificial language segmentation task similar to Saffran, Newport, and Aslin (1996) in which the length of sentences was systematically varied between groups of participants. We then compared the fit of a variety of computational models— including simple statistical models of transitional probability and mutual information, a clustering model based on mutual information by Swingley (2005), PARSER (Perruchet & Vintner, 1998), and a Bayesian model. We found that while all models were able to successfully complete the task, fit to the human data varied considerably, with the Bayesian model achieving the highest correlation with our results.},
}
@inproceedings{mansinghka2007aclass,
author = {Mansinghka, Vikash K. and Roy, Daniel and Rifkin, Ryan and Tenenbaum, Joshua B.},
title = {A{C}lass: An online algorithm for generative classification},
booktitle = {AISTATS 2007: Artificial Intelligence and Statistics 11},
volume = {2},
pages = {315--322},
year = 2007,
publisher = {PMLR},
url_paper = {http://www.jmlr.org/proceedings/papers/v2/mansinghka07a/mansinghka07a.pdf},
url_link = {http://www.jmlr.org/proceedings/papers/v2/mansinghka07a.html},
abstract = {We present AClass, a simple, online, parallelizable algorithm for supervised multiclass classification. AClass models each class conditional density as a Chinese restaurant process mixture, and performs approximate inference in this model using a sequential Monte Carlo scheme. AClass combines several strengths of previous approaches to classification that are not typically found in a single algorithm; it supports learning from missing data and yields sensibly regularized nonlinear decision boundaries while remaining computationally efficient. We compare AClass to several standard classification algorithms and show competitive performance.},
}
@inproceedings{mansinghka2006structured,
author = {Mansinghka, Vikash K. and Kemp, Charles and Tenenbaum, Joshua B.},
title = {Structured priors for structure learning},
year = 2006,
booktitle = {(UAI 2006): Uncertainty in Artificial Intelligence 22},
pages = {324--331},
url_paper = {https://dslpitt.org/uai/papers/06/p324-mansinghka.pdf},
url_link = {https://dslpitt.org/uai/displayArticleDetails.jsp?mmnu=1&smnu=2&article_id=1314&proceeding_id=22},
abstract = {Traditional approaches to Bayes net structure learning typically assume little regularity in graph structure other than sparseness. However, in many cases, we expect more systematicity: variables in real-world systems often group into classes that predict the kinds of probabilistic dependencies they participate in. Here we capture this form of prior knowledge in a hierarchical Bayesian framework, and exploit it to enable structure learning and type discovery from small datasets. Specifically, we present a nonparametric generative model for directed acyclic graphs as a prior for Bayes net structure learning. Our model assumes that variables come in one or more classes and that the prior probability of an edge existing between two variables is a function only of their classes. We derive an MCMC algorithm for simultaneous inference of the number of classes, the class assignments of variables, and the Bayes net structure over variables. For several realistic, sparse datasets, we show that the bias towards systematicity of connections provided by our model can yield more accurate learned networks than the traditional approach of using a uniform prior, and that the classes found by our model are appropriate.},
}
@inproceedings{roy2006learning,
title = {Learning annotated hierarchies from relational data},
author = {Roy, Daniel and Kemp, Charles and Mansinghka, Vikash K. and Tenenbaum, Joshua B.},
booktitle = {NIPS 2006: Advances in Neural Information Processing Systems 19},
pages = {1185--1192},
year = 2006,
publisher = {Curran Associates},
url_paper = {https://papers.nips.cc/paper/3077-learning-annotated-hierarchies-from-relational-data.pdf},
url_link = {https://papers.nips.cc/paper/3077-learning-annotated-hierarchies-from-relational-data},
abstract = {The objects in many real-world domains can be organized into hierarchies, where each internal node picks out a category of objects. Given a collection of features and relations defined over a set of objects, an annotated hierarchy includes a specification of the categories that are most useful for describing each individual feature and relation. We define a generative model for annotated hierarchies and the features and relations that they describe, and develop a Markov chain Monte Carlo scheme for learning annotated hierarchies. We show that our model discovers interpretable structure in several real-world data sets.},
}
@inproceedings{shafto2006learning,
title = {Learning cross-cutting systems of categories},
author = {Shafto, Pat and Kemp, Charles and Mansinghka, Vikash K. and Gordon, Matthew and Tenenbaum, Joshua B.},
booktitle = {NIPS 2006: Proceedings of the 28th Annual Conference of the Cognitive Science Society},
pages = {2146--2151},
year = 2006,
url_paper = {http://csjarchive.cogsci.rpi.edu/Proceedings/2006/docs/p2146.pdf},
url_link = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.183.9893},
abstract = {Most natural domains can be represented in multiple ways: animals may be thought of in terms of their taxonomic groupings or their ecological niches and foods may be thought of in terms of their nutritional content or social role. We present a computational framework that discovers multiple systems of categories given information about a domain of objects and their properties. Each system of object categories accounts for a distinct and coherent subset of the features. A first experiment shows that our CrossCat model predicts human learning in an artificial category learning task. A second experiment shows that the model discovers important structure in two real-world domains. Traditional models of categorization usually search for a single system of categories: we suggest that these models do not predict human performance in our task, and miss important structure in our real world examples.},
}
@inproceedings{goodman2006intuitive,
title = {Intuitive theories of mind: A rational approach to false belief},
author = {Goodman, Noah and Baker, Chris and Bonawitz, Elizabeth and Mansinghka, Vikash K. and Gopnik, Alison and Wellman, Henry and Schulz, Laura and Tenenbaum, Joshua B.},
booktitle = {COGSCI 2006: Proceedings of the 28th Annual Conference of the Cognitive Science Society},
pages = {1382--1387},
year = 2006,
url_paper = {http://web.mit.edu/cocosci/Papers/pos785-goodman.pdf},
url_link = {http://web.mit.edu/cocosci/Papers/pos785-goodman},
}