%\documentstyle[10pt,twoside]{article}
\documentstyle[twoside, 11pt,amsfonts]{article}
\setlength{\oddsidemargin}{0.25 in}
\setlength{\evensidemargin}{-0.25 in} \setlength{\topmargin}{-0.6
in} \setlength{\textwidth}{6.5 in} \setlength{\textheight}{8.5 in}
\setlength{\headsep}{0.75 in} \setlength{\parindent}{0 in}
\setlength{\parskip}{0.1 in}
%
% The following commands sets up the lecnum (lecture number)
% counter and make various numbering schemes work relative
% to the lecture number.
%
\newcounter{lecnum}
\renewcommand{\thepage}{\thelecnum-\arabic{page}}
\renewcommand{\thesection}{\thelecnum.\arabic{section}}
\renewcommand{\theequation}{\thelecnum.\arabic{equation}}
\renewcommand{\thefigure}{\thelecnum.\arabic{figure}}
\renewcommand{\thetable}{\thelecnum.\arabic{table}}
%
% The following macro is used to generate the header.
%
\newcommand{\problemset}[3]{
\pagestyle{myheadings}
\thispagestyle{plain}
\newpage
\setcounter{lecnum}{#1}
\setcounter{page}{1}
\noindent
\begin{center}
\framebox{
\vbox{\vspace{2mm}
\hbox to 6.28in { {\bf CS~151~~~Complexity Theory
\hfill Spring 2021} }
\vspace{4mm}
\hbox to 6.28in { {\Large \hfill Problem Set #1 \hfill} }
\vspace{2mm}
\hbox to 6.28in { {\it Out: #2 \hfill Due: #3} }
\vspace{2mm}}
}
\end{center}
\vspace*{4mm}
}
%
% Convention for citations is authors' initials followed by the year.
% For example, to cite a paper by Leighton and Maggs you would type
% \cite{LM89}, and to cite a paper by Strassen you would type \cite{S69}.
% (To avoid bibliography problems, for now we redefine the \cite command.)
%
\renewcommand{\cite}[1]{[#1]}
\input{epsf}
%Use this command for a figure; it puts a figure in wherever you want it.
%usage: \fig{NUMBER}{FIGURE-SIZE}{CAPTION}{FILENAME}
\newcommand{\fig}[4]{
\vspace{0.2 in}
\setlength{\epsfxsize}{#2}
\centerline{\epsfbox{#4}}
\begin{center}
Figure \thelecnum.#1:~#3
\end{center}
}
% Use these for theorems, lemmas, proofs, etc.
\newtheorem{theorem}{Theorem}[lecnum]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newenvironment{proof}{{\bf Proof:}}{\hfill\rule{2mm}{2mm}}
% Some useful equation alignment commands, borrowed from TeX
\makeatletter
\def\eqalign#1{\,\vcenter{\openup\jot\m@th
\ialign{\strut\hfil$\displaystyle{##}$&$\displaystyle{{}##}$\hfil
\crcr#1\crcr}}\,}
\def\eqalignno#1{\displ@y \tabskip\@centering
\halign to\displaywidth{\hfil$\displaystyle{##}$\tabskip\z@skip
&$\displaystyle{{}##}$\hfil\tabskip\@centering
&\llap{$##$}\tabskip\z@skip\crcr
#1\crcr}}
\def\leqalignno#1{\displ@y \tabskip\@centering
\halign to\displaywidth{\hfil$\displaystyle{##}$\tabskip\z@skip
&$\displaystyle{{}##}$\hfil\tabskip\@centering
&\kern-\displaywidth\rlap{$##$}\tabskip\displaywidth\crcr
#1\crcr}}
\makeatother
% **** IF YOU WANT TO DEFINE ADDITIONAL MACROS FOR YOURSELF, PUT THEM HERE:
\def\F{{\mathbb{F}}}
\begin{document}
\problemset{4}{April 22}{{\bf April 29}}
Reminder: you are encouraged to work in groups of two or three;
however you must turn in your own write-up and note with whom you
worked. You may consult the course notes and the optional text (Papadimitriou). The
full honor code guidelines and collaboration policy can be found in the course syllabus.
Please attempt all problems. {\bf Please turn in your solutions via
Gradescope, by 1pm Los Angeles time on the due date.}
\begin{enumerate}
\item Define $\widetilde{\mbox{\bf ZPP}}$ to be the class of all
languages decided by a probabilistic Turing Machine running in
{\em expected} polynomial time. That is, for every language $L \in
\widetilde{\mbox{\bf ZPP}}$ there is a probabilistic Turing
Machine $M$ (with two read-only tapes --- the first tape containing the input, and the second tape containing a random bit in every tape square) with the following behavior: on input $x \in L$,
$M$ always accepts, on input $x \not\in L$, $M$ always rejects,
and for every input $x$,
\[\mbox{E}[\mbox{\# steps before $M$ halts}] = |x|^{O(1)}.\]
Show that $\widetilde{\mbox{\bf ZPP}} = \mbox{\bf
ZPP}$.
\item List-decoding of the binary Hadamard code. Throughout this
problem $\F_2$ is the field with $2$ elements (addition and
multiplication are performed modulo 2). Given a $k$-bit message
$m$, the associated Hadamard codeword $C(m)$ is described by first
producing a linear multivariate polynomial $p_m(x_0, x_1, \ldots,
x_{k-1}) = \sum_{i = 0}^{k-1}m_ix_i$, and then evaluating that
polynomial at all vectors in the space $\F_2^k$: $C(m) =
(p_m(w))_{w \in \F_2^k}$. Thus the codeword has $n = 2^k$ bits,
and the $w$-th bit is the inner product mod 2 of the k-bit vectors
$m$ and $w$. The bits of a codeword $C = C(m)$ are naturally indexed by $\F_2^k$; we write $C_w$ (with $w \in \F_2^k$) to mean the $w$-th coordinate, which is just $p_m(w)$.
Since the distance of the Hadamard code is $(1/2)n$ (by
Schwartz-Zippel), unique decoding is only possible from up to
$(1/4)n$ errors. In this problem you will show that efficient {\em
list-decoding} is possible from a received word $R$ that has
suffered up to $(1/2 - \epsilon)n$ errors.
You may need to use Chebyshev's Inequality: for a random variable
$X$ and any $k > 0$,
\[\Pr[|X - \mbox{E}[X]| \ge k] \le \frac{\mbox{Var}[X]}{k^2}.\]
\begin{enumerate}
\item Consider the following probabilistic procedure. Pick $\ell$
vectors $v_1, v_2, \ldots, v_{\ell} \in \F_2^k$ independently and
uniformly at random. For a subset $S \subseteq \{1,2,3, \ldots,
\ell\}$ define $u_S = \sum_{i \in S} v_i$. Show that for every
non-empty set $S$ and every $\alpha \in \F_2^k$, we have $\Pr[u_S
= \alpha] = 2^{-k}$. Then show that for every pair of non-empty
subsets $S$ and $T$ with $S \ne T$, and every $\alpha, \beta \in
\F_2^k$, we have:
\[\Pr[u_S = \alpha \land u_T = \beta] = \Pr[u_S = \alpha]\Pr[u_T = \beta].\]
In other words, the set of vectors $u_S$ are pairwise independent
random variables uniformly distributed on $\F_2^k$.
\item Suppose that the received word $R$ agrees with $C = C(m)$ in
at least a $1/2 + \epsilon$ fraction of its $n$ bits. Verify for
yourself that $C_w + C_{w + e_i} = m_i$ (here $e_i$ is the $i$-th
elementary vector in $\F_2^k$ -- the vector with 1 in the i-th
position and zeros elsewhere). Of course, in our decoding
algorithm we do not have access to $C$ to find $C_w$ and $C_{w +
e_i}$. So, we will replace $C_w$ with a ``guess'' (for now imagine
it is always correct), and $C_{w + e_i}$ with $R_{w + e_i}$ (which
may or may not be correct). Show that for all $i$:
\[\Pr\left[ \left |\{S \ne \emptyset : C_{u_S} + R_{u_S + e_i} = m_i\}\right |
\le \frac{2^\ell - 1}{2}\right] \le \frac{1}{4\epsilon^2(2^\ell -1
)}\] Hint: define indicator random variables for the event $R_{u_S
+ e_i} = C_{u_S + e_i}$. Use the fact that for pairwise
independent random variables, the variance of the sum is the sum
of the variances.
\item Describe a probabilistic procedure $A$ that has the
following behavior:
\begin{itemize}
\item it has random access to a word $R$ that agrees with $C =
C(m)$ in at least a $1/2 + \epsilon$ fraction of its $n$ bits, and
\item it runs in time $\mbox{poly}(k, \epsilon^{-1})$, and
\item with probability at least $3/4$ it outputs a list of $L =
O(k\epsilon^{-2})$ ``candidate messages'' $m_1, m_2, \ldots, m_L$
that includes the original message $m$.
\end{itemize}
Hint: argue that it takes surprisingly few bits to describe
$(C_{u_S})_{S \ne \emptyset}$.
\item Prove the Goldreich-Levin Theorem: for every function family
$f_n:\{0,1\}^n \rightarrow \{0,1\}^n$, \[GL(x, y) = \sum_{i=1}^n
x_iy_i\] is a {\em hard bit} for the function family
$f'_{n}:\{0,1\}^n \times \{0,1\}^n \rightarrow \{0,1\}^{2n}$
defined by $f'(x,y) = (f(x), y)$. That is, if there is a circuit
family $\{C_n\}$ of size $s(n) \ge n$ that achieves:
\begin{eqnarray}\Pr_{x,y}[C_n(x,y) = GL(f'_n(x,y))] \ge 1/2 +
\epsilon(n)\label{eq}\end{eqnarray} then there is a circuit family
$\{C'_n\}$ of size $s'(n)$ that achieves:
\[\Pr_{x,y}[C'_n(x,y) = f'_n(x,y)] \ge \epsilon'(n),\]
with $s'(n) = (s(n)/\epsilon(n))^{O(1)}$ and $\epsilon'(n) =
(\epsilon(n)/n)^{O(1)}$. Hint: It may be useful to observe that by
an averaging argument, (\ref{eq}) implies that for at least an
$\epsilon(n)/2$ fraction of the $x$'s:
\[\Pr_{y}[C_n(x,y) = GL(f'_n(x,y))] \ge 1/2 + \epsilon(n)/2.\]
\end{enumerate}
\newpage
\item List-decoding of Reed-Solomon codes. Throughout this problem
$\F_q$ is the field with $q$ elements. Given a $k$-bit message
$m$, the associated Reed-Solomon codeword $C(m)$ is described by
first producing a degree $k-1$ polynomial $p_m(x) = \sum_{i =
0}^{k-1}m_ix^i$, and then evaluating that polynomial at all of the
elements in the field $\F_q$: $C(m) = (p_m(w))_{w \in \F_q}$. You
should think of $q$ as being polynomial in $k$.
Now, we are given a received word $R$ that has suffered $e$
errors. We know that if $e > q/2$ unique decoding is impossible,
since the distance of this code is {\em a priori} at most $q$. In
this problem you will show that efficient {\em list-decoding} is
possible when $e$ is as large as $q - \sqrt{2kq}$. If we choose,
say, $q = k^2$, then this implies that we can recover from up to a
$1 - o(1)$ fraction of errors!
You may need the following fact about polynomials: $(x - \alpha)$
divides a polynomial $p(x)$ iff $\alpha$ is a root of $p$.
\begin{enumerate}
\item We view the received word $R$ as a function $R:\F_q
\rightarrow \F_q$. Show that one can efficiently find a polynomial
$Q(x, y) \not\equiv 0$ with $x$-degree at most $\sqrt{q}$ and
$y$-degree at most $\sqrt{q}$ for which $Q(w, R(w)) = 0$ for all
$w \in \F_q$. Hint: elementary linear algebra is sufficient here.
\item Let $p:\F_q \rightarrow \F_q$ be a polynomial of degree at
most $k-1$ for which $|\{w \in F_q : p(w) \ne R(w) \}| \le q - t$.
That is, $p$ is a RS codeword that agrees with the received word
$R$ in at least $t$ locations. Show that if $t > k\sqrt{q}$ then
$(y - p(x))$ divides $Q(x, y)$. (Hint: view $Q$ as a {\em
univariate} polynomial in $y$ with coefficients in $\F_q[x]$.)
Conclude that using an efficient algorithm for factoring
multivariate polynomials (which is known), we can find all
codewords which have agreement $t > k\sqrt{q}$ with $R$.
\item The $(1, k-1)$-weighted degree of a polynomial $Q(x, y) =
\sum_{i,j}q_{i,j}x^iy^j$ is defined to be $\max\{i + (k-1)j :
q_{i,j} \ne 0\}$. Refine your analysis in parts (a) and (b) to
show that one can find $Q(x, y)$ with the required properties {\em
and with $(1, k-1)$-weighted degree at most $\sqrt{2kq}$}, and
therefore can tolerate agreement only $t
> \sqrt{2kq}$.
\end{enumerate}
\item List decoding of Parvaresh-Vardy codes. The setting is
similar to the one in the previous problem. Throughout this
problem $\F_q$ is the field with $q$ elements. We have an integer
parameter $c \ge 1$, and we fix a polynomial $E(x)$ of degree $k$,
that is irreducible over $\F_q$. Set $h = \lceil (q+1)^{1/(c+1)}
\rceil$.
Given a $k$-bit message $m$, the associated Parvaresh-Vardy
codeword $C(m)$ is described as follows:
\begin{itemize}
\item produce the degree $k-1$ polynomial $p_m(x) = \sum_{i =
0}^{k-1}m_ix^i$
\item define $p_{m}^{(i)}(x)$ to be the polynomial
\[(p_m(x))^{h^i} \bmod E(x),\] for $i = 0,1,2,\ldots, c-1$. Note
that $p_m^{(0)}(x)$ is just $p_m(x)$.
\item the codeword $C(m)$ is the following sequence of symbols in
$\F_q^c$: \[\left ([p_m^{(0)}(w), p_m^{(1)}(w), \ldots,
p_m^{(c-1)}(w)]\right )_{w \in F_q}.\]
\end{itemize}
Now, we are given a received word $R$ that has suffered $e$
errors. You will show that {\em list-decoding} is possible for
Parvaresh-Vardy codes when $e$ is as large as $q - (c+1)kh$. Note,
for example, that when taking $c = \log q$, we require agreement
only $\approx k\log q$, in contrast to the $\approx\sqrt{kq}$
achievable with Reed-Solomon codes (typically, $q$ is a large
polynomial in $k$).
\begin{enumerate}
\item We view the received word $R$ as a function $R:\F_q
\rightarrow \F_q^c$. Show that one can efficiently find a
polynomial $Q(x, y_0, y_1, \ldots, y_{c-1}) \not\equiv 0$ with
individual variable degrees at most $h-1$ for which $Q(w, R(w)_0,
\ldots R(w)_{c-1}) = 0$ for all $w \in \F_q$. Hint: elementary
linear algebra is sufficient here.
\item Let $p:\F_q \rightarrow \F_q$ be a polynomial of degree at
most $k-1$ for which
\[|\{w \in F_q : [p^{(0)}(w), p^{(1)}(w), \ldots, p^{(c-1)}(w)] \ne R(w) \}| \le q - t.\]
That is, $p$ is a Parvaresh-Vardy codeword that agrees with the
received word $R$ in at least $t$ locations. Show that if $t >
(c+1)kh$ then the polynomial
\[Q(x, p^{(0)}(x), p^{(1)}(x), \ldots, p^{(c-1)}(x))\]
is the zero polynomial.
\item Recall that $\F_q[x]/E(x)$ -- the set of univariate
polynomials with coefficients in $\F_q$ and with multiplication
and addition performed modulo $E(x)$ -- is a field. Define the
following polynomial with coefficients in this field:
\[Q^*(z) = Q(x, z, z^h, z^{h^2}, \ldots, z^{h^{c-1}}) \bmod E(x).\]
Assuming the existence of an efficient algorithm that outputs all
of the roots of a given univariate polynomial with coefficients in
$\F_q[x]/E(x)$, and making reference to $Q^*$, argue that one can
efficiently find all Parvaresh-Vardy codewords which have
agreement $t > (c+1)kh$ with $R$.
\end{enumerate}
\end{enumerate}
\end{document}