\documentclass[12pt,titlepage]{article} \usepackage{amsmath} \usepackage{mathrsfs} \usepackage{amsfonts} \usepackage{amssymb} \usepackage{amsthm} \usepackage{mathtools} \usepackage{graphicx} \usepackage{color} \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage{xparse} \usepackage{hyperref} %----Macros---------- % % Unresolved issues: % % \righttoleftarrow % \lefttorightarrow % % \color{} with HTML colorspec % \bgcolor % \array with options (without options, it's equivalent to the matrix environment) % Of the standard HTML named colors, white, black, red, green, blue and yellow % are predefined in the color package. Here are the rest. \definecolor{aqua}{rgb}{0, 1.0, 1.0} \definecolor{fuschia}{rgb}{1.0, 0, 1.0} \definecolor{gray}{rgb}{0.502, 0.502, 0.502} \definecolor{lime}{rgb}{0, 1.0, 0} \definecolor{maroon}{rgb}{0.502, 0, 0} \definecolor{navy}{rgb}{0, 0, 0.502} \definecolor{olive}{rgb}{0.502, 0.502, 0} \definecolor{purple}{rgb}{0.502, 0, 0.502} \definecolor{silver}{rgb}{0.753, 0.753, 0.753} \definecolor{teal}{rgb}{0, 0.502, 0.502} % Because of conflicts, \space and \mathop are converted to % \itexspace and \operatorname during preprocessing. % itex: \space{ht}{dp}{wd} % % Height and baseline depth measurements are in units of tenths of an ex while % the width is measured in tenths of an em. \makeatletter \newdimen\itex@wd% \newdimen\itex@dp% \newdimen\itex@thd% \def\itexspace#1#2#3{\itex@wd=#3em% \itex@wd=0.1\itex@wd% \itex@dp=#2ex% \itex@dp=0.1\itex@dp% \itex@thd=#1ex% \itex@thd=0.1\itex@thd% \advance\itex@thd\the\itex@dp% \makebox[\the\itex@wd]{\rule[-\the\itex@dp]{0cm}{\the\itex@thd}}} \makeatother % \tensor and \multiscript \makeatletter \newif\if@sup \newtoks\@sups \def\append@sup#1{\edef\act{\noexpand\@sups={\the\@sups #1}}\act}% \def\reset@sup{\@supfalse\@sups={}}% \def\mk@scripts#1#2{\if #2/ \if@sup ^{\the\@sups}\fi \else% \ifx #1_ \if@sup ^{\the\@sups}\reset@sup \fi {}_{#2}% \else \append@sup#2 \@suptrue \fi% \expandafter\mk@scripts\fi} \def\tensor#1#2{\reset@sup#1\mk@scripts#2_/} \def\multiscripts#1#2#3{\reset@sup{}\mk@scripts#1_/#2% \reset@sup\mk@scripts#3_/} \makeatother % \slash \makeatletter \newbox\slashbox \setbox\slashbox=\hbox{$/$} \def\itex@pslash#1{\setbox\@tempboxa=\hbox{$#1$} \@tempdima=0.5\wd\slashbox \advance\@tempdima 0.5\wd\@tempboxa \copy\slashbox \kern-\@tempdima \box\@tempboxa} \def\slash{\protect\itex@pslash} \makeatother % math-mode versions of \rlap, etc % from Alexander Perlis, "A complement to \smash, \llap, and lap" % http://math.arizona.edu/~aprl/publications/mathclap/ \def\clap#1{\hbox to 0pt{\hss#1\hss}} \def\mathllap{\mathpalette\mathllapinternal} \def\mathrlap{\mathpalette\mathrlapinternal} \def\mathclap{\mathpalette\mathclapinternal} \def\mathllapinternal#1#2{\llap{$\mathsurround=0pt#1{#2}$}} \def\mathrlapinternal#1#2{\rlap{$\mathsurround=0pt#1{#2}$}} \def\mathclapinternal#1#2{\clap{$\mathsurround=0pt#1{#2}$}} % Renames \sqrt as \oldsqrt and redefine root to result in \sqrt[#1]{#2} \let\oldroot\root \def\root#1#2{\oldroot #1 \of{#2}} \renewcommand{\sqrt}[2][]{\oldroot #1 \of{#2}} % Manually declare the txfonts symbolsC font \DeclareSymbolFont{symbolsC}{U}{txsyc}{m}{n} \SetSymbolFont{symbolsC}{bold}{U}{txsyc}{bx}{n} \DeclareFontSubstitution{U}{txsyc}{m}{n} % Manually declare the stmaryrd font \DeclareSymbolFont{stmry}{U}{stmry}{m}{n} \SetSymbolFont{stmry}{bold}{U}{stmry}{b}{n} % Manually declare the MnSymbolE font \DeclareFontFamily{OMX}{MnSymbolE}{} \DeclareSymbolFont{mnomx}{OMX}{MnSymbolE}{m}{n} \SetSymbolFont{mnomx}{bold}{OMX}{MnSymbolE}{b}{n} \DeclareFontShape{OMX}{MnSymbolE}{m}{n}{ <-6> MnSymbolE5 <6-7> MnSymbolE6 <7-8> MnSymbolE7 <8-9> MnSymbolE8 <9-10> MnSymbolE9 <10-12> MnSymbolE10 <12-> MnSymbolE12}{} % Declare specific arrows from txfonts without loading the full package \makeatletter \def\re@DeclareMathSymbol#1#2#3#4{% \let#1=\undefined \DeclareMathSymbol{#1}{#2}{#3}{#4}} \re@DeclareMathSymbol{\neArrow}{\mathrel}{symbolsC}{116} \re@DeclareMathSymbol{\neArr}{\mathrel}{symbolsC}{116} \re@DeclareMathSymbol{\seArrow}{\mathrel}{symbolsC}{117} \re@DeclareMathSymbol{\seArr}{\mathrel}{symbolsC}{117} \re@DeclareMathSymbol{\nwArrow}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\nwArr}{\mathrel}{symbolsC}{118} \re@DeclareMathSymbol{\swArrow}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\swArr}{\mathrel}{symbolsC}{119} \re@DeclareMathSymbol{\nequiv}{\mathrel}{symbolsC}{46} \re@DeclareMathSymbol{\Perp}{\mathrel}{symbolsC}{121} \re@DeclareMathSymbol{\Vbar}{\mathrel}{symbolsC}{121} \re@DeclareMathSymbol{\sslash}{\mathrel}{stmry}{12} \re@DeclareMathSymbol{\bigsqcap}{\mathop}{stmry}{"64} \re@DeclareMathSymbol{\biginterleave}{\mathop}{stmry}{"6} \re@DeclareMathSymbol{\invamp}{\mathrel}{symbolsC}{77} \re@DeclareMathSymbol{\parr}{\mathrel}{symbolsC}{77} \makeatother % \llangle, \rrangle, \lmoustache and \rmoustache from MnSymbolE \makeatletter \def\Decl@Mn@Delim#1#2#3#4{% \if\relax\noexpand#1% \let#1\undefined \fi \DeclareMathDelimiter{#1}{#2}{#3}{#4}{#3}{#4}} \def\Decl@Mn@Open#1#2#3{\Decl@Mn@Delim{#1}{\mathopen}{#2}{#3}} \def\Decl@Mn@Close#1#2#3{\Decl@Mn@Delim{#1}{\mathclose}{#2}{#3}} \Decl@Mn@Open{\llangle}{mnomx}{'164} \Decl@Mn@Close{\rrangle}{mnomx}{'171} \Decl@Mn@Open{\lmoustache}{mnomx}{'245} \Decl@Mn@Close{\rmoustache}{mnomx}{'244} \makeatother % Widecheck \makeatletter \DeclareRobustCommand\widecheck[1]{{\mathpalette\@widecheck{#1}}} \def\@widecheck#1#2{% \setbox\z@\hbox{\m@th$#1#2$}% \setbox\tw@\hbox{\m@th$#1% \widehat{% \vrule\@width\z@\@height\ht\z@ \vrule\@height\z@\@width\wd\z@}$}% \dp\tw@-\ht\z@ \@tempdima\ht\z@ \advance\@tempdima2\ht\tw@ \divide\@tempdima\thr@@ \setbox\tw@\hbox{% \raise\@tempdima\hbox{\scalebox{1}[-1]{\lower\@tempdima\box \tw@}}}% {\ooalign{\box\tw@ \cr \box\z@}}} \makeatother % \mathraisebox{voffset}[height][depth]{something} \makeatletter \NewDocumentCommand\mathraisebox{moom}{% \IfNoValueTF{#2}{\def\@temp##1##2{\raisebox{#1}{$\m@th##1##2$}}}{% \IfNoValueTF{#3}{\def\@temp##1##2{\raisebox{#1}[#2]{$\m@th##1##2$}}% }{\def\@temp##1##2{\raisebox{#1}[#2][#3]{$\m@th##1##2$}}}}% \mathpalette\@temp{#4}} \makeatletter % udots (taken from yhmath) \makeatletter \def\udots{\mathinner{\mkern2mu\raise\p@\hbox{.} \mkern2mu\raise4\p@\hbox{.}\mkern1mu \raise7\p@\vbox{\kern7\p@\hbox{.}}\mkern1mu}} \makeatother %% Fix array \newcommand{\itexarray}[1]{\begin{matrix}#1\end{matrix}} %% \itexnum is a noop \newcommand{\itexnum}[1]{#1} %% Renaming existing commands \newcommand{\underoverset}[3]{\underset{#1}{\overset{#2}{#3}}} \newcommand{\widevec}{\overrightarrow} \newcommand{\darr}{\downarrow} \newcommand{\nearr}{\nearrow} \newcommand{\nwarr}{\nwarrow} \newcommand{\searr}{\searrow} \newcommand{\swarr}{\swarrow} \newcommand{\curvearrowbotright}{\curvearrowright} \newcommand{\uparr}{\uparrow} \newcommand{\downuparrow}{\updownarrow} \newcommand{\duparr}{\updownarrow} \newcommand{\updarr}{\updownarrow} \newcommand{\gt}{>} \newcommand{\lt}{<} \newcommand{\map}{\mapsto} \newcommand{\embedsin}{\hookrightarrow} \newcommand{\Alpha}{A} \newcommand{\Beta}{B} \newcommand{\Zeta}{Z} \newcommand{\Eta}{H} \newcommand{\Iota}{I} \newcommand{\Kappa}{K} \newcommand{\Mu}{M} \newcommand{\Nu}{N} \newcommand{\Rho}{P} \newcommand{\Tau}{T} \newcommand{\Upsi}{\Upsilon} \newcommand{\omicron}{o} \newcommand{\lang}{\langle} \newcommand{\rang}{\rangle} \newcommand{\Union}{\bigcup} \newcommand{\Intersection}{\bigcap} \newcommand{\Oplus}{\bigoplus} \newcommand{\Otimes}{\bigotimes} \newcommand{\Wedge}{\bigwedge} \newcommand{\Vee}{\bigvee} \newcommand{\coproduct}{\coprod} \newcommand{\product}{\prod} \newcommand{\closure}{\overline} \newcommand{\integral}{\int} \newcommand{\doubleintegral}{\iint} \newcommand{\tripleintegral}{\iiint} \newcommand{\quadrupleintegral}{\iiiint} \newcommand{\conint}{\oint} \newcommand{\contourintegral}{\oint} \newcommand{\infinity}{\infty} \newcommand{\bottom}{\bot} \newcommand{\minusb}{\boxminus} \newcommand{\plusb}{\boxplus} \newcommand{\timesb}{\boxtimes} \newcommand{\intersection}{\cap} \newcommand{\union}{\cup} \newcommand{\Del}{\nabla} \newcommand{\odash}{\circleddash} \newcommand{\negspace}{\!} \newcommand{\widebar}{\overline} \newcommand{\textsize}{\normalsize} \renewcommand{\scriptsize}{\scriptstyle} \newcommand{\scriptscriptsize}{\scriptscriptstyle} \newcommand{\mathfr}{\mathfrak} \newcommand{\statusline}[2]{#2} \newcommand{\tooltip}[2]{#2} \newcommand{\toggle}[2]{#2} % Theorem Environments \theoremstyle{plain} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \newtheorem{prop}{Proposition} \newtheorem{cor}{Corollary} \newtheorem*{utheorem}{Theorem} \newtheorem*{ulemma}{Lemma} \newtheorem*{uprop}{Proposition} \newtheorem*{ucor}{Corollary} \theoremstyle{definition} \newtheorem{defn}{Definition} \newtheorem{example}{Example} \newtheorem*{udefn}{Definition} \newtheorem*{uexample}{Example} \theoremstyle{remark} \newtheorem{remark}{Remark} \newtheorem{note}{Note} \newtheorem*{uremark}{Remark} \newtheorem*{unote}{Note} %------------------------------------------------------------------- \begin{document} %------------------------------------------------------------------- \section*{entropy} \hypertarget{context}{}\subsubsection*{{Context}}\label{context} \hypertarget{measure_and_probability_theory}{}\paragraph*{{Measure and probability theory}}\label{measure_and_probability_theory} [[!include measure theory - contents]] \hypertarget{entropy}{}\section*{{Entropy}}\label{entropy} \noindent\hyperlink{idea}{Idea}\dotfill \pageref*{idea} \linebreak \noindent\hyperlink{mathematical_definitions}{Mathematical definitions}\dotfill \pageref*{mathematical_definitions} \linebreak \noindent\hyperlink{preliminary_definitions}{Preliminary definitions}\dotfill \pageref*{preliminary_definitions} \linebreak \noindent\hyperlink{surprisal}{Surprisal}\dotfill \pageref*{surprisal} \linebreak \noindent\hyperlink{almost_partitions}{Almost partitions}\dotfill \pageref*{almost_partitions} \linebreak \noindent\hyperlink{entropy_of_a_algebra_on_a_probability_space}{Entropy of a $\sigma$-algebra on a probability space}\dotfill \pageref*{entropy_of_a_algebra_on_a_probability_space} \linebreak \noindent\hyperlink{entropy_of_a_probability_space}{Entropy of a probability space}\dotfill \pageref*{entropy_of_a_probability_space} \linebreak \noindent\hyperlink{entropy_of_a_partition_of_a_probability_space}{Entropy of a partition of a probability space}\dotfill \pageref*{entropy_of_a_partition_of_a_probability_space} \linebreak \noindent\hyperlink{entropy_of_a_partition_of_a_discrete_probability_space}{Entropy of (a partition of) a discrete probability space}\dotfill \pageref*{entropy_of_a_partition_of_a_discrete_probability_space} \linebreak \noindent\hyperlink{entropy_with_respect_to_an_absolutely_continuous_probability_measure_on_the_real_line}{Entropy with respect to an absolutely continuous probability measure on the real line}\dotfill \pageref*{entropy_with_respect_to_an_absolutely_continuous_probability_measure_on_the_real_line} \linebreak \noindent\hyperlink{entropy_of_a_density_matrix}{Entropy of a density matrix}\dotfill \pageref*{entropy_of_a_density_matrix} \linebreak \noindent\hyperlink{RelativeEntropy}{Relative entropy}\dotfill \pageref*{RelativeEntropy} \linebreak \noindent\hyperlink{physical}{Physical entropy}\dotfill \pageref*{physical} \linebreak \noindent\hyperlink{gravitational_entropy}{Gravitational entropy}\dotfill \pageref*{gravitational_entropy} \linebreak \noindent\hyperlink{related_concepts}{Related concepts}\dotfill \pageref*{related_concepts} \linebreak \noindent\hyperlink{References}{References}\dotfill \pageref*{References} \linebreak \noindent\hyperlink{ReferencesGeneral}{General}\dotfill \pageref*{ReferencesGeneral} \linebreak \noindent\hyperlink{categorical_and_cohomological_interpretations}{Categorical and cohomological interpretations}\dotfill \pageref*{categorical_and_cohomological_interpretations} \linebreak \noindent\hyperlink{ReferencesAxiomaticCharacterization}{Axiomatic characterizations}\dotfill \pageref*{ReferencesAxiomaticCharacterization} \linebreak \hypertarget{idea}{}\subsection*{{Idea}}\label{idea} Entropy is a measure of disorder, given by the amount of [[information]] necessary to precisely specify the [[state]] of a system. Entropy is important in [[information theory]] and [[statistical physics]]. \hypertarget{mathematical_definitions}{}\subsection*{{Mathematical definitions}}\label{mathematical_definitions} We can give a precise [[mathematics|mathematical]] definition of the entropy in [[probability theory]]. \hypertarget{preliminary_definitions}{}\subsubsection*{{Preliminary definitions}}\label{preliminary_definitions} We will want a couple of preliminary definitions. Fix a [[probability space]] $(X,\mu)$; that is, $X$ is a [[set]], and $\mu$ is a [[probability measure]] on $X$. \hypertarget{surprisal}{}\paragraph*{{Surprisal}}\label{surprisal} If $A$ is a [[measurable subset]] of $X$, then the \textbf{surprisal} or \textbf{self-[[information]]} of $A$ (with respect to $\mu$) is \begin{displaymath} \sigma_\mu(A) \coloneqq -\log \mu(A) . \end{displaymath} Notice that, despite the minus sign in this formula, $\sigma$ is a nonnegative function (since $\log p \leq 0$ for $p \leq 1$); more precisely, $\sigma$ takes values in $[0,infty]$. The term `surprisal' is intended to suggest how surprised one ought to be upon learning that the event modelled by $A$ is true: from no surprise for an event with probability $1$ to infinite surprise for an event with probability $0$. The \textbf{expected surprisal} of $A$ is then \begin{displaymath} h_\mu(A) \coloneqq \mu(A) \sigma_\mu(A) = -\mu(A) \log \mu(A) = -\log(\mu(A)^{\mu(A)}) \end{displaymath} (with $h_\mu(A) = 0$ when $\mu(A) = 0$). Like $\sigma$, $h$ is a nonnegative function; it is also important that $h_\mu$ is [[convex function|concave]]. Both $h_\mu(\nothing)$ and $h_\mu(X)$ are $0$, but for different reasons; $h_\mu(A) = 0$ when $\mu(A) = 1$ because, upon observing an event with probability $1$, one gains no information; while $h_\mu(A) = 0$ when $\mu(A) = 0$ because one expects never to observe an event with probability $0$. The maximum possible value of $h$ is $\mathrm{e}^{-1} \log \mathrm{e}$ (so $\mathrm{e}^{-1}$ if we use [[natural logarithms]]), which occurs when $\mu(A) = \mathrm{e}^{-1}$. We have not specified the base of the [[logarithm]], which amounts to a constant factor (proportional to the logarithm of the base), which we think of as specifying the [[unit of measurement]] of entropy. Common choices for the base are $2$ (whose unit is the [[bit]], originally a unit of memory in computer science), $256$ (byte: $8$ bits), $3$ (trit), $\mathrm{e}$ (nat or neper), $10$ (bel, originally a unit of relative power intensity in telegraphy, or ban, dit, or hartley), and $\root{10}{10}$ (decibel: $1/10$ of a bel). In applications to [[statistical physics]], common bases are approximately $10^{3.1456 \times 10^{22}}$ (joule per kelvin), $1.65404$ (calorie per mole-kelvin), etc. \hypertarget{almost_partitions}{}\paragraph*{{Almost partitions}}\label{almost_partitions} Recall that a \textbf{[[partition]]} of a set $X$ is a [[family of subsets|family]] $\mathcal{P}$ of subsets of $X$ (the \emph{parts} of the partition) such that $X$ is the [[union]] of the parts and any two distinct parts are [[disjoint sets|disjoint]] (or better, for [[constructive mathematics]], two parts are equal if their intersection is [[inhabited subset|inhabited]]). When $X$ is a probability space, we may relax both conditions: for the union of $\mathcal{P}$, we require only that it be a [[full set]]; for the intersections of pairs of elements of $\mathcal{P}$, we require only that they be [[null sets]] (or better, for constructive mathematics, that $A = B$ when $\mu^*(A \cap B) \gt 0$, where $\mu^*$ is the [[outer measure]] corresponding to $\mu$). For definiteness, call such a collection of subsets a \textbf{$\mu$-almost partition}; a $\mu$-almost partition is \emph{measurable} if each of its part is measurable (in which case we can use $\mu$ instead of $\mu^*$). \hypertarget{entropy_of_a_algebra_on_a_probability_space}{}\subsubsection*{{Entropy of a $\sigma$-algebra on a probability space}}\label{entropy_of_a_algebra_on_a_probability_space} This is a general mathematical definition of entropy. Given a probability [[measure space]] $(X,\mu)$ and a $\sigma$-[[sigma-algebra|algebra]] $\mathcal{M}$ of [[measurable sets]] in $X$, the \textbf{entropy} of $\mathcal{M}$ with respect to $\mu$ is \begin{equation} H_\mu(\mathcal{M}) \coloneqq \sup \{ \sum_{A \in \mathcal{F}} h_\mu(A) \;|\; \mathcal{F} \subseteq \mathcal{M},\; {|\mathcal{F}|} \lt \aleph_0,\; X = \biguplus \mathcal{F} \} . \label{general}\end{equation} In words, the entropy is the [[supremum]], over all ways of expressing $X$ as an internal [[disjoint union]] of [[finite set|finitely many]] elements of the $\sigma$-algebra $\mathcal{M}$, of the sum, over these measurable sets, of the expected surprisals of these sets. This supremum can also be expressed as a [[convergence|limit]] as we take $\mathcal{F}$ to be finer and finer, since $h_\mu$ is concave and the partitions are [[directed set|directed]]. We have written this so that $\mathcal{F}$ is a finite partition of $X$; without loss of generality, we may require only that $\mathcal{F}$ be a $\mu$-almost partition. In [[constructive mathematics]], it seems that we \emph{must} use this weakened condition, at least the part that allows $\bigcup \mathcal{F}$ to merely be full. This definition is very general, and it is instructive to look at special cases. \hypertarget{entropy_of_a_probability_space}{}\subsubsection*{{Entropy of a probability space}}\label{entropy_of_a_probability_space} Given a probability space $(X,\mu)$, the \textbf{entropy} of this probability space is the entropy, with respect to $\mu$, of the $\sigma$-algebra of \emph{all} measurable subsets of $X$. \hypertarget{entropy_of_a_partition_of_a_probability_space}{}\subsubsection*{{Entropy of a partition of a probability space}}\label{entropy_of_a_partition_of_a_probability_space} Every measurable almost-partition of a measure space (indeed, any family of measurable subsets) generates a $\sigma$-algebra. The \textbf{entropy} of a measurable almost-partition $\mathcal{P}$ of a probability measure space $(X,\mu)$ is the entropy, with respect to $\mu$, of the $\sigma$-algebra generated by $\mathcal{P}$. The formula \eqref{general} may then be written \begin{equation} H_\mu(\mathcal{P}) = \sum_{A \in \mathcal{P}} h_\mu(A) = -\sum_{A \in \mathcal{P}} \log(\mu(A)^{\mu(A)}) , \label{partition}\end{equation} since an infinite sum (of nonnegative terms) may also be defined as a supremum. (Actually, the supremum in the infinite sum does not quite match the supremum in \eqref{general}, so there is a bit of a theorem to prove here.) In most of the following special cases, we will consider only partitions, although it would be possible also to consider more general $\sigma$-algebras. \hypertarget{entropy_of_a_partition_of_a_discrete_probability_space}{}\subsubsection*{{Entropy of (a partition of) a discrete probability space}}\label{entropy_of_a_partition_of_a_discrete_probability_space} Recall that a \textbf{discrete probability space} is a [[set]] $X$ equipped with a function $\mu\colon X \to ]0,1]$ such that $\sum_{i \in X} \mu(i) = 1$; since $\mu(i) \gt 0$ is possible for only countably many $i$, $X$ must be [[countable set|countable]]. We make $X$ into a measure space (with every subset measurable) by defining $\mu(A) \coloneqq \sum_{i \in A} \mu(i)$. Since every inhabited set has positive measure, every almost-partition of $X$ is a partition; since every set is measurable, any partition is measurable. Given a discrete probability space $(X,\mu)$ and a partition $\mathcal{P}$ of $X$, the \textbf{entropy} of $\mathcal{P}$ with respect to $\mu$ is defined to be the entropy of $\mathcal{P}$ with respect to the probability measure induced by $\mu$. Simplifying \eqref{partition}, we find \begin{displaymath} H_\mu(\mathcal{P}) = -\sum_{A \in \mathcal{P}} \log((\sum_{i \in A} \mu(i))^{\sum_{i \in A} \mu(i)}) . \end{displaymath} More specially, the \textbf{entropy} of the discrete probability space $(X,\mu)$ is the entropy of the partition of $X$ into [[singletons]]; we find \begin{displaymath} H_\mu(X) = \sum_{i \in X} h_\mu(i) = -\sum_{i \in X} \log(\mu(i)^{\mu(i)}) . \end{displaymath} This is actually a special case of the entropy of a probability space, since the $\sigma$-algebra generated by the singletons is the power set of $X$. Yet more specially, the \textbf{entropy} of a [[finite set]] $X$ is the entropy of $X$ equipped with the uniform discrete probability measure; we find \begin{equation} H_{unif}(X) = -\sum_{i \in X} \log((\frac{1}{|X|})^{\frac{1}{|X|}}) = \log {|X|} , \label{Boltzmann}\end{equation} which is probably the best known mathematical formula for entropy, due to [[Max Planck]], who attributed it to [[Ludwig Boltzmann]]. (Its \hyperlink{physical}{physical interpretation} appears below.) Of all probability measures on $X$, the uniform measure has the [[maximum entropy]]. \hypertarget{entropy_with_respect_to_an_absolutely_continuous_probability_measure_on_the_real_line}{}\subsubsection*{{Entropy with respect to an absolutely continuous probability measure on the real line}}\label{entropy_with_respect_to_an_absolutely_continuous_probability_measure_on_the_real_line} Recall that a [[Borel measure]] $\mu$ on an [[interval]] $X$ in the [[real line]] is \textbf{[[absolutely continuous measure|absolutely continuous]]} if $\mu(A) = 0$ whenever $A$ is a [[null set]] (with respect to [[Lebesgue measure]]), or better such that $\mu(A) \gt 0$ whenever the Lebesgue measure of $A$ is positive. In this case, we can take the [[Radon–Nikodym derivative]] of $\mu$ with respect to Lebesgue measure, to get an [[integrable function]] $f$, called the \textbf{probability distribution function}; we recover $\mu$ by \begin{equation} \mu(A) = \int_A f(x) \mathrm{d}x , \label{pdf}\end{equation} where the integral is taken with respect to Lebesgue measure. If $\mathcal{P}$ is a partition (or a Lebesgue-almost-partition) of an interval $X$ into [[Borel sets]], then the \textbf{entropy} of $\mathcal{P}$ with respect to an integrable function $f$ is the entropy of $\mathcal{P}$ with respect to the measure induced by $f$ using the integral formula \eqref{pdf}; we find \begin{displaymath} H_f(\mathcal{P}) = -\sum_{A \in \mathcal{P}} \log((\int_A f(x) \mathrm{d}x)^{\int_A f(x) \mathrm{d}x}) . \end{displaymath} On the other hand, the \textbf{entropy} of the probability distribution space $(X,f)$ is the entropy of the entire $\sigma$-algebra of all Borel sets (which is \emph{not} generated by a partition) with respect to $f$; we find \begin{displaymath} H_f(X) = -\int_{x \in X} \log(f(x)^{f(x)}) \mathrm{d}x \end{displaymath} by a fairly complicated argument. I haven't actually managed to check this argument yet, although my memory tags it as a true fact. ---Toby \hypertarget{entropy_of_a_density_matrix}{}\subsubsection*{{Entropy of a density matrix}}\label{entropy_of_a_density_matrix} In the analogy between [[classical physics]] and [[quantum physics]], we move from probability distributions on a [[phase space]] to [[density operators]] on a [[Hilbert space]]. So just as the entropy of a probability distribution $f$ is given by $- \int f \log f$, so the \textbf{entropy} of a density operator $\rho$ is \begin{displaymath} H_\rho \coloneqq -Tr (\rho \log \rho) , \end{displaymath} using the [[functional calculus]]. These are both special cases of the entropy of a [[state]] on a $C^*$-[[C-star-algebra|algebra]]. There is a way to fit this into the framework given by \eqref{general}, but I don't remember it (and never really understood it). \hypertarget{RelativeEntropy}{}\subsubsection*{{Relative entropy}}\label{RelativeEntropy} For two finite probability distributions $p$ and $q$, their \textbf{relative entropy} is \begin{displaymath} S(p/q) \coloneqq \sum_{k = 1}^n p_k(log p_k - log q_k) \,. \end{displaymath} Or alternatively, for $\rho, \phi$ two [[density matrix|density matrices]], their relative entropy is \begin{displaymath} S(\rho/\phi) \coloneqq tr \rho(log \rho - log \phi) \,. \end{displaymath} There is a generalization of these definitions to [[state]]s on general [[von Neumann algebra]]s, due to (\hyperlink{Araki}{Araki}). For more on this see \emph{[[relative entropy]]}. \hypertarget{physical}{}\subsection*{{Physical entropy}}\label{physical} As hinted above, any probability distribution on a [[phase space]] in [[classical physics]] has an entropy, and any [[density matrix]] on a [[Hilbert space]] in [[quantum physics]] has an entropy. However, these are \textbf{microscopic entropy}, which is not the usual entropy in [[thermodynamics]] and most other branches of [[physics]]. (In particular, microscopic entropy is conserved, rather than increasing with time.) Instead, physicists use \emph{coarse-grained} entropy, which corresponds mathematically to taking the entropy of a $\sigma$-algebra much smaller than the $\sigma$-algebra of all measurable sets. Given a classical system with $N$ microscopic degrees of freedom, we identify $n$ macroscopic degrees of freedom that we can reasonably expect to measure, giving a map from $\mathbb{R}^N$ to $\mathbb{R}^n$ (or more generally, a map from an $N$-dimensional microscopic phase space to an $n$-dimensional macroscopic phase space). Then the $\sigma$-algebra of all measurable sets in $\mathbb{R}^n$ [[pullback|pulls back]] to a $\sigma$-algebra on $\mathbb{R}^N$, and the \textbf{macroscopic entropy} of a statistical state is the [[conditional entropy]] of this $\sigma$-algebra. (Typically, $N$ is on the order of [[Avogadro's number]], while $n$ is rarely more than half a dozen, and often as small as $2$.) If we specify a state by a point in $\mathbb{R}^n$, a macroscopic pure state, and assume a uniform probability distribution on its [[fibre]] in $\mathbb{R}^N$, then this results in the [[maximum entropy]]. If this fibre were a finite set, then we would recover Boltzmann's formula \eqref{Boltzmann}. This is never exactly true in classical statistical physics, but it is often nevertheless a very good approximation. (Boltzmann's formula actually makes better physical sense in quantum statistical physics, even though Boltzmann himself did not live to see this.) A more sophisticated approach (pioneered by [[Josiah Gibbs]]) is to consider all possible mixed microstates (that is all possible probability distributions on the space $\mathbb{R}^N$ of pure microstates) whose [[expectation values]] of total energy and other [[extensive quantities]] (among those that are functions of the macrostate) match the given pure macrostate (point in $\mathbb{R}^n$). We pick the mixed microstate with the [[maximum entropy]]. If this is a [[thermal state]], then we say that the macrostate has a [[temperature]], but it has an entropy in any case. \hypertarget{gravitational_entropy}{}\subsubsection*{{Gravitational entropy}}\label{gravitational_entropy} \begin{itemize}% \item gravitational entropy \begin{itemize}% \item [[Bekenstein-Hawking entropy]] \item [[generalized second law of thermodynamics]] \end{itemize} \end{itemize} \hypertarget{related_concepts}{}\subsection*{{Related concepts}}\label{related_concepts} \begin{itemize}% \item [[entropic force]] \item [[dissipative system]] \item [[entanglement entropy]], [[holographic entanglement entropy]] \end{itemize} \hypertarget{References}{}\subsection*{{References}}\label{References} \hypertarget{ReferencesGeneral}{}\subsubsection*{{General}}\label{ReferencesGeneral} The concept of entropy was introduced, by [[Rudolf Clausius]] in 1865, in the context of [[physics]], and then adapted to [[information theory]] by [[Claude Shannon]] in 1948, to [[quantum mechanics]] by [[John von Neumann]] in 1955, to [[ergodic theory]] by [[Andrey Kolmogorov]] and Sinai in 1958, and to [[topological dynamics]] by Adler, Konheim and McAndrew in 1965. \hyperlink{RelativeEntropy}{Relative entropy} of [[states]] on [[von Neumann algebras]] was introduced in \begin{itemize}% \item [[Huzihiro Araki]], \emph{Relative Entropy of States of von Neumann Algebras} (\href{http://www.google.de/url?sa=t&source=web&cd=5&ved=0CEsQFjAE&url=http%3A%2F%2Fwww.ems-ph.org%2Fjournals%2Fshow_pdf.php%3Fissn%3D0034-5318%26vol%3D11%26iss%3D3%26rank%3D9&rct=j&q=entropy%20cocycle%20von%20Neumann%20algebra&ei=n3jrTYyxOI-c-waJvMnPDw&usg=AFQjCNGuJgVUE7CtGPmb2PZLhOOWt1_JPQ&cad=rja}{pdf}) \end{itemize} Survey with an eye towards [[black hole entropy]] includes \begin{itemize}% \item [[Ted Jacobson]], \emph{Entropy from Carnot to Bekenstein} (\href{https://arxiv.org/abs/1810.07839}{arXiv:1810.07839}) \end{itemize} A survey of entropy in [[operator algebras]] is in \begin{itemize}% \item Erling St\o{}rmer, \emph{Entropy in operator algebras} (\href{http://www.researchgate.net/profile/Erling_Stormer/publication/228832297_Entropy_in_operator_algebras/links/02bfe50ed5ccabaf69000000.pdf?origin=publication_detail}{pdf}) \end{itemize} See also \begin{itemize}% \item A. P. Balachandran, T. R. Govindarajan, Amilcar R. de Queiroz, A. F. Reyes-Lega, \emph{Algebraic approach to entanglement and entropy} (\href{http://arxiv.org/abs/1301.1300}{arXiv:1301.1300}) \end{itemize} A large collection of references on quantum entropy is in \begin{itemize}% \item Christopher Fuchs, \emph{References for research in quantum distinguishability and state disturbance} (\href{http://www.perimeterinstitute.ca/personal/cfuchs/BigRef.pdf}{pdf}) \end{itemize} \hypertarget{categorical_and_cohomological_interpretations}{}\subsubsection*{{Categorical and cohomological interpretations}}\label{categorical_and_cohomological_interpretations} A discussion of entropy with an eye towards the [[presheaf topos]] over the [[site]] of finite [[measure spaces]] is in \begin{itemize}% \item [[Mikhail Gromov]], \emph{In a search for a structure, Part I: On entropy} (2012) (\href{https://www.ihes.fr/~gromov/wp-content/uploads/2018/08/structre-serch-entropy-july5-2012.pdf}{pdf}) \item [[William Lawvere]], \emph{State categories, closed categories, and the existence} (subtitle: Semi-continuous entropy functions), IMA reprint 86, \href{https://www.ima.umn.edu/State-Categories-Closed-Categories-and-Existence-Semi-Continuous-Entropy-Functions}{pdf} \end{itemize} (Co)homological viewpoint is discussed in \begin{itemize}% \item P. Baudot, D. Bennequin, \emph{The homological nature of entropy}, Entropy, 17(5):3253--3318, 2015 \href{https://doi.org/10.3390/e17053253}{doi} (open access) \end{itemize} (for an update see also the abstract of a Baudot's talk \href{https://calendar.math.cas.cz/content/information-cohomology-and-topological-information-data-analysis}{here}) \hypertarget{ReferencesAxiomaticCharacterization}{}\subsubsection*{{Axiomatic characterizations}}\label{ReferencesAxiomaticCharacterization} After the concept of entropy proved enormously useful in practice, many people have tried to find a more abstract foundation for the concept (and its variants) by characterizing it as the unique measure satisfying some list of plausible-sounding axioms. A characterization of \hyperlink{RelativeEntropy}{relative entropy} on finite-[[dimension|dimensional]] [[C-star algebras]] is given in \begin{itemize}% \item D. Petz, \emph{Characterization of the relative entropy of states of matrix algebras}, Acta Math. Hung. 59 (3-4) (1992) (\href{http://www.renyi.hu/~petz/pdf/52.pdf}{pdf}) \end{itemize} A simple characterization of von Neumann entropy of [[density matrices]] (mixed [[quantum states]]) is discussed in \begin{itemize}% \item Bernhard Baumgartner, \emph{Characterizing Entropy in Statistical Physics and in Quantum Information Theory}, \href{http://arxiv.org/abs/1206.5727}{arXiv:1206.5727} \end{itemize} Entropy-like quantities appear in the study of many PDEs, with entropy estimates. For an intro see \begin{itemize}% \item L. C. Evans, \emph{A survey of entropy methods for partial differential equations}, \href{http://math.berkeley.edu/~evans/ams.entropy.pdf}{pdf}; (and longer course text:) \emph{Entropy and partial differential equations}, \href{http://math.berkeley.edu/~evans/entropy.and.PDE.pdf}{pdf} \end{itemize} [[!redirects entropy]] [[!redirects entropies]] \end{document}