チュートリアル 3: 同時フィッティング／回帰

第3週5日目: ネットワーク因果性

Neuromatch Academyによる

コンテンツ作成者: Ari Benjamin, Tony Liu, Konrad Kording

コンテンツレビュアー: Mike X Cohen, Madineh Sarvestani, Yoni Friedman, Ella Batty, Michael Waskom

制作編集者: Gagana B, Spiros Chavlis

チュートリアルの目的

チュートリアルの推定所要時間: 20分

これは因果性を調べる日のチュートリアル3です。以下は本日扱う内容の大まかな概要で、このノートブックで重点的に扱うセクションは太字にしています：

因果性の定義をマスターする
因果性の推定が可能であることを理解する
4つの異なる方法を学び、それらが失敗する場合を理解する

介入（perturbations）
相関
同時フィッティング／回帰
器具変数（instrumental variables）

チュートリアル3の目的

チュートリアル2では、因果の近似として相関を探り、より大きなネットワークでは相関 $\neq$ 因果であることを学びました。しかし、相関の計算は比較的単純なアプローチであり、より洗練された手法で因果性をより正確に推定できるのか疑問に思うかもしれません。制御はできないのでしょうか？

ここでは、観察データから因果性を推定する一般的な高度（しかし議論のある）手法を使います。これらの手法は、介入や相関を使おうとするのではなく、データに直接関数をフィットさせることに依存しています。システムの完全な閉形式の方程式があるので、これらの手法を試して、介入がない場合に因果的結合性をどれだけうまく推定できるかを見てみます。具体的には、

因果性推定のためのより高度（しかし議論のある）手法について学ぶ
- 条件付き確率（回帰）
制限や失敗モードを探る
- 省略変数バイアスの問題を理解する

# @title Tutorial slides
# @markdown These are the slides for the videos in all tutorials today
from IPython.display import IFrame
link_id = "gp4m9"
print(f"If you want to download the slides: https://osf.io/download/{link_id}/")
IFrame(src=f"https://mfr.ca-1.osf.io/render?url=https://osf.io/{link_id}/?direct%26mode=render%26action=download%26mode=render", width=854, height=480)

セットアップ

# @title Install and import feedback gadget


from vibecheck import DatatopsContentReviewContainer
def content_review(notebook_section: str):
    return DatatopsContentReviewContainer(
        "",  # No text prompt
        notebook_section,
        {
            "url": "https://pmyvdlilci.execute-api.us-east-1.amazonaws.com/klab",
            "name": "neuromatch_cn",
            "user_key": "y1x3mpx5",
        },
    ).render()


feedback_prefix = "W3D5_T3"

# Imports
import numpy as np
import matplotlib.pyplot as plt

from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Lasso

# @title Figure Settings
import logging
logging.getLogger('matplotlib.font_manager').disabled = True

import ipywidgets as widgets  # interactive display
%config InlineBackend.figure_format = 'retina'
plt.style.use("https://raw.githubusercontent.com/NeuromatchAcademy/course-content/main/nma.mplstyle")

# @title Plotting Functions

def see_neurons(A, ax, ratio_observed=1, arrows=True, show=False):
  """
  Visualizes the connectivity matrix.

  Args:
      A (np.ndarray): the connectivity matrix of shape (n_neurons, n_neurons)
      ax (plt.axis): the matplotlib axis to display on

  Returns:
      Nothing, but visualizes A.
  """
  n = len(A)

  ax.set_aspect('equal')
  thetas = np.linspace(0, np.pi * 2, n, endpoint=False)
  x, y = np.cos(thetas), np.sin(thetas),
  if arrows:
    for i in range(n):
      for j in range(n):
        if A[i, j] > 0:
          ax.arrow(x[i], y[i], x[j] - x[i], y[j] - y[i], color='k',
                   head_width=.05, width = A[i, j] / 25, shape='right',
                   length_includes_head=True, alpha=.2)
  if ratio_observed < 1:
    nn = int(n * ratio_observed)
    ax.scatter(x[:nn], y[:nn], c='r', s=150, label='Observed')
    ax.scatter(x[nn:], y[nn:], c='b', s=150, label='Unobserved')
    ax.legend(fontsize=15)
  else:
    ax.scatter(x, y, c='k', s=150)
  ax.axis('off')
  if show:
    plt.show()


def plot_connectivity_matrix(A, ax=None):
  """Plot the (weighted) connectivity matrix A as a heatmap

    Args:
      A (ndarray): connectivity matrix (n_neurons by n_neurons)
      ax: axis on which to display connectivity matrix
  """
  if ax is None:
    ax = plt.gca()
  lim = np.abs(A).max()
  ax.imshow(A, vmin=-lim, vmax=lim, cmap="coolwarm")
  plt.show()

# @title Helper Functions

def sigmoid(x):
  """
  Compute sigmoid nonlinearity element-wise on x.

  Args:
    x (np.ndarray): the numpy data array we want to transform
  Returns
    (np.ndarray): x with sigmoid nonlinearity applied
  """
  return 1 / (1 + np.exp(-x))


def create_connectivity(n_neurons, random_state=42, p=0.9):
  """
  Generate our nxn causal connectivity matrix.

  Args:
    n_neurons (int): the number of neurons in our system.
    random_state (int): random seed for reproducibility

  Returns:
    A (np.ndarray): our 0.1 sparse connectivity matrix
  """
  np.random.seed(random_state)
  A_0 = np.random.choice([0, 1], size=(n_neurons, n_neurons), p=[p, 1 - p])

  # set the timescale of the dynamical system to about 100 steps
  _, s_vals, _ = np.linalg.svd(A_0)
  if s_vals[0] != 0 and not np.isnan(s_vals[0]):
    A = A_0 / (1.01 * s_vals[0])
  else:
    eps = 1e-12
    A = eps*np.ones_like(A_0)  # if denominator is zero, set A to a small value

  return A


def simulate_neurons(A, timesteps, random_state=42):
  """
  Simulates a dynamical system for the specified number of neurons and timesteps.

  Args:
    A (np.array): the connectivity matrix
    timesteps (int): the number of timesteps to simulate our system.
    random_state (int): random seed for reproducibility

  Returns:
    X has shape (n_neurons, timeteps).
  """
  np.random.seed(random_state)


  n_neurons = len(A)
  X = np.zeros((n_neurons, timesteps))

  for t in range(timesteps - 1):
    epsilon = np.random.multivariate_normal(np.zeros(n_neurons),
                                            np.eye(n_neurons))
    X[:, t + 1] = sigmoid(A.dot(X[:, t]) + epsilon)

    assert epsilon.shape == (n_neurons, )
  return X


def get_sys_corr(n_neurons, timesteps, random_state=42, neuron_idx=None):
  """
  A wrapper function for our correlation calculations between A and R.

  Args:
    n_neurons (int): the number of neurons in our system.
    timesteps (int): the number of timesteps to simulate our system.
    random_state (int): seed for reproducibility
    neuron_idx (int): optionally provide a neuron idx to slice out

  Returns:
    A single float correlation value representing the similarity between A and R
  """

  A = create_connectivity(n_neurons, random_state)
  X = simulate_neurons(A, timesteps)

  R = correlation_for_all_neurons(X)

  return np.corrcoef(A.flatten(), R.flatten())[0, 1]


def correlation_for_all_neurons(X):
  """
  Computes the connectivity matrix for the all neurons using correlations

  Args:
    X: the matrix of activities

  Returns:
    estimated_connectivity (np.ndarray): estimated connectivity for the
                                         selected neuron, of shape (n_neurons,)
  """
  n_neurons = len(X)
  S = np.concatenate([X[:, 1:], X[:, :-1]], axis=0)
  R = np.corrcoef(S)[:n_neurons, n_neurons:]
  return R

上記で定義されたヘルパー関数は以下の通りです：

sigmoid: Tutorial 1からの入力に要素ごとにシグモイド非線形変換を計算する関数
create_connectivity: Tutorial 1からのnxnの因果結合行列を生成する関数
simulate_neurons: Tutorial 1から指定されたニューロン数とタイムステップ数で動的システムをシミュレートする関数
get_sys_corr: Tutorial 2からAとR間の相関計算のラッパー関数
correlation_for_all_neurons: Tutorial 2から相関を用いて全ニューロンの結合行列を計算する関数

セクション1: 回帰：モデルフィッティングによる結合性の回復

# @title Video 1: Regression approach
from ipywidgets import widgets
from IPython.display import YouTubeVideo
from IPython.display import IFrame
from IPython.display import display


class PlayVideo(IFrame):
  def __init__(self, id, source, page=1, width=400, height=300, **kwargs):
    self.id = id
    if source == 'Bilibili':
      src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'
    elif source == 'Osf':
      src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'
    super(PlayVideo, self).__init__(src, width, height, **kwargs)


def display_videos(video_ids, W=400, H=300, fs=1):
  tab_contents = []
  for i, video_id in enumerate(video_ids):
    out = widgets.Output()
    with out:
      if video_ids[i][0] == 'Youtube':
        video = YouTubeVideo(id=video_ids[i][1], width=W,
                             height=H, fs=fs, rel=0)
        print(f'Video available at https://youtube.com/watch?v={video.id}')
      else:
        video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,
                          height=H, fs=fs, autoplay=False)
        if video_ids[i][0] == 'Bilibili':
          print(f'Video available at https://www.bilibili.com/video/{video.id}')
        elif video_ids[i][0] == 'Osf':
          print(f'Video available at https://osf.io/{video.id}')
      display(video)
    tab_contents.append(out)
  return tab_contents


video_ids = [('Youtube', 'Av4LaXZdgDo'), ('Bilibili', 'BV1m54y1q78b')]
tab_contents = display_videos(video_ids, W=854, H=480)
tabs = widgets.Tab()
tabs.children = tab_contents
for i in range(len(tab_contents)):
  tabs.set_title(i, video_ids[i][0])
display(tabs)

# @title Submit your feedback
content_review(f"{feedback_prefix}_Regression_approach_Video")

相関は隠れた交絡因子がない場合にのみ因果を示唆する、という考えに馴染みがあるかもしれません。これは、相関が因果を示すのは、他の変数が相関を説明できない場合に限るという直感と一致します。

交絡の例：
睡眠時間が長い人ほど学校の成績が良いという相関を観察したとします。良い相関ですね。しかし他に説明できることは？睡眠時間が長い人は裕福で、アルバイトをしておらず、宿題をする時間があるのかもしれません。睡眠が成績を因果的に良くしているかを相関で答えたいなら、すべての可能な交絡因子を制御しなければなりません。

交絡因子とは、結果と元の共変量の両方に影響を与える変数のことです。例では、睡眠と成績の両方に影響を与えるものが交絡因子です。

交絡因子の制御：
交絡因子は回帰に共変量として加えることで制御できます。しかし、係数が因果効果であるためには3つの条件が必要です：

すべての交絡因子が共変量として含まれていること
回帰が共変量と結果の関係の数学的形を仮定していること（線形、GLMなど）
共変量が処置（元の変数）と結果の両方によって原因されていないこと。これらはコライダー$と呼ばれます。今日は紹介しませんが（ご自身で調べてみてください！コライダーは非常に直感に反します。）

現実世界ではこれらの条件を満たすことは非常に難しいです。脳ではさらに難しい（すべてのニューロンを測定できないため）。幸い今日は自分たちでシステムをシミュレートしました。

# @title Video 2: Fitting a GLM
from ipywidgets import widgets
from IPython.display import YouTubeVideo
from IPython.display import IFrame
from IPython.display import display


class PlayVideo(IFrame):
  def __init__(self, id, source, page=1, width=400, height=300, **kwargs):
    self.id = id
    if source == 'Bilibili':
      src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'
    elif source == 'Osf':
      src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'
    super(PlayVideo, self).__init__(src, width, height, **kwargs)


def display_videos(video_ids, W=400, H=300, fs=1):
  tab_contents = []
  for i, video_id in enumerate(video_ids):
    out = widgets.Output()
    with out:
      if video_ids[i][0] == 'Youtube':
        video = YouTubeVideo(id=video_ids[i][1], width=W,
                             height=H, fs=fs, rel=0)
        print(f'Video available at https://youtube.com/watch?v={video.id}')
      else:
        video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,
                          height=H, fs=fs, autoplay=False)
        if video_ids[i][0] == 'Bilibili':
          print(f'Video available at https://www.bilibili.com/video/{video.id}')
        elif video_ids[i][0] == 'Osf':
          print(f'Video available at https://osf.io/{video.id}')
      display(video)
    tab_contents.append(out)
  return tab_contents


video_ids = [('Youtube', 'GvMj9hRv5Ak'), ('Bilibili', 'BV16p4y1S7yE')]
tab_contents = display_videos(video_ids, W=854, H=480)
tabs = widgets.Tab()
tabs.children = tab_contents
for i in range(len(tab_contents)):
  tabs.set_title(i, video_ids[i][0])
display(tabs)

# @title Submit your feedback
content_review(f"{feedback_prefix}_Fitting_a_GLM_Video")

我々のシステムでは各ニューロンが以下のように他のすべてのニューロンに影響を与えます：

\vec{x}_{t+1} = \sigma(A\vec{x}_t + \epsilon_t),

ここで $\sigma$ は前回のシグモイド非線形関数： $\sigma(x) = \frac{1}{1 + e^{-x}}$

我々のシステムは閉じたシステムなので、省略変数はありません。回帰係数は因果効果であるはずです。果たしてそうでしょうか？

我々は回帰アプローチを用いて、ニューロン#1に対するすべてのニューロンの因果影響を推定します。具体的には、以下の線形回帰で $A$ を決定します：

\sigma^{-1}(\vec{x}_{t+1}) = A\vec{x}_t + \epsilon_t ,

ここで $\sigma^{-1}$ はシグモイドの逆変換で、ロジット変換とも呼ばれます： $\sigma^{-1}(x) = \log(\frac{x}{1-x})$ 。

$W$ を $\vec{x}_t$ の値で、最後から2番目のタイムステップ $T-1$ までの行列とします：

W = \begin{bmatrix} \mid & \mid & ... & \mid \\ \vec{x}_0 & \vec{x}_1 & ... & \vec{x}_{T-1} \\ \mid & \mid & ... & \mid \end{bmatrix}_{n \times (T-1)}

$Y$ を選択したニューロン $i$ の $\vec{x}_{t+1}$ の値で、2番目のタイムステップから最後のタイムステップ $T$ までの行ベクトルとします：

Y = \begin{bmatrix} x_{i,1} & x_{i,2} & ... & x_{i, T} \\ \end{bmatrix}_{1 \times (T-1)}

次に以下のモデルをフィットします：

\sigma^{-1}(Y^T) = W^\top V

ここで $V$ はこの回帰の $n \times 1$ の係数行列で、選択したニューロンと他のニューロン間の推定結合行列になります。

復習: 第1週で学んだように、ラッソすなわち** $L_1$ 正則化**は係数をスパースにし、ほとんどがゼロになります。なぜここでそれが望ましいか考えてみてください。

コーディング演習1: 線形回帰とラッソを使って因果結合性を推定する

これから上記の回帰モデルと $V$ をフィットする関数を作成します。その後、この関数を呼び出して回帰と相関のどちらが真の因果性に近いかを調べます。

コード:

ここで $Y$ と $W$ の両方を転置していることに気づくでしょう。なぜでしょう？

scikit-learnの機械学習モデルは、入力データの行が観測値、列が変数であることを期待しています。しかし我々の $Y$ と $W$ の定義では、システムのタイムステップ（観測）が列になっているため、両方の行列を転置してscikit-learnに適した行列の向きにしています。

scikit-learnの抽象化のおかげで、この回帰のフィッティングは Lasso() 推定器の初期化と fit() 関数の呼び出しだけです
Lasso 推定器のハイパーパラメータは以下を使います：
- alpha = 0.01
- fit_intercept = False
フィットしたモデルから $V$ はどうやって得る？

ヘルパー関数 logit を使います。

# Set parameters
n_neurons = 50  # the size of our system
timesteps = 10000  # the number of timesteps to take
random_state = 42
neuron_idx = 1

# Set up system and simulate
A = create_connectivity(n_neurons, random_state)
X = simulate_neurons(A, timesteps)

# @markdown Execute this cell to enable helper function `logit`

def logit(x):
  """
  Applies the logit (inverse sigmoid) transformation

  Args:
    x (np.ndarray): the numpy data array we want to transform
  Returns
    (np.ndarray): x with logit nonlinearity applied
  """
  return np.log(x/(1-x))

def get_regression_estimate(X, neuron_idx):
  """
  Estimates the connectivity matrix using lasso regression.

  Args:
    X (np.ndarray): our simulated system of shape (n_neurons, timesteps)
    neuron_idx (int):  a neuron index to compute connectivity for

  Returns:
    V (np.ndarray): estimated connectivity matrix of shape (n_neurons, n_neurons).
                    if neuron_idx is specified, V is of shape (n_neurons,).
  """
  # Extract Y and W as defined above
  W = X[:, :-1].transpose()
  Y = X[[neuron_idx], 1:].transpose()

  # Apply inverse sigmoid transformation
  Y = logit(Y)

  ############################################################################
  ## TODO: Insert your code here to fit a regressor with Lasso. Lasso captures
  ## our assumption that most connections are precisely 0.
  ## Fill in function and remove
  raise NotImplementedError("Please complete the regression exercise")
  ############################################################################

  # Initialize regression model with no intercept and alpha=0.01
  regression = ...

  # Fit regression to the data
  regression.fit(...)

  V = regression.coef_

  return V


# Estimate causality with regression
V = get_regression_estimate(X, neuron_idx)

print(f"Regression: correlation of estimated with true connectivity: {np.corrcoef(A[neuron_idx, :], V)[1, 0]:.3f}")
print(f"Lagged correlation of estimated with true connectivity: {get_sys_corr(n_neurons, timesteps, random_state, neuron_idx=neuron_idx):.3f}")

以下のような結果が得られるはずです：

Regression: correlation of estimated connectivity with true connectivity: 0.865
Lagged correlation of estimated connectivity with true connectivity: 0.703

解答を見る$

# @title Submit your feedback
content_review(f"{feedback_prefix}_Linear_regression_with_lasso_to_estimate_causal_connectivities_Exercise")

これらの数値から、重回帰は単純な相関よりも結合性の推定に優れていることがわかります。

セクション2: 部分的に観測されたシステム

チュートリアル開始からここまでの推定所要時間: 10分

システム全体を観測できない場合、省略変数バイアスが問題になります。すべてのニューロンを観測できず制御できない場合でも、因果効果を正確に推定できるでしょうか？

# @title Video 3: Omitted variable bias
from ipywidgets import widgets
from IPython.display import YouTubeVideo
from IPython.display import IFrame
from IPython.display import display


class PlayVideo(IFrame):
  def __init__(self, id, source, page=1, width=400, height=300, **kwargs):
    self.id = id
    if source == 'Bilibili':
      src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'
    elif source == 'Osf':
      src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'
    super(PlayVideo, self).__init__(src, width, height, **kwargs)


def display_videos(video_ids, W=400, H=300, fs=1):
  tab_contents = []
  for i, video_id in enumerate(video_ids):
    out = widgets.Output()
    with out:
      if video_ids[i][0] == 'Youtube':
        video = YouTubeVideo(id=video_ids[i][1], width=W,
                             height=H, fs=fs, rel=0)
        print(f'Video available at https://youtube.com/watch?v={video.id}')
      else:
        video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,
                          height=H, fs=fs, autoplay=False)
        if video_ids[i][0] == 'Bilibili':
          print(f'Video available at https://www.bilibili.com/video/{video.id}')
        elif video_ids[i][0] == 'Osf':
          print(f'Video available at https://osf.io/{video.id}')
      display(video)
    tab_contents.append(out)
  return tab_contents


video_ids = [('Youtube', '5CCib6CTMac'), ('Bilibili', 'BV1ov411i7dc')]
tab_contents = display_videos(video_ids, W=854, H=480)
tabs = widgets.Tab()
tabs.children = tab_contents
for i in range(len(tab_contents)):
  tabs.set_title(i, video_ids[i][0])
display(tabs)

# @title Submit your feedback
content_review(f"{feedback_prefix}_Omitted_variable_bias_Video")

ビデオの訂正: ビデオ内の「connectivity from」/「connectivity to」のラベルが逆になっていますが、以下の図やデモでは修正済みです

まず、観測するニューロンの割合が75%の場合と25%の場合で、結合行列の異なる部分集合を可視化します。

結合行列のエントリの意味を思い出してください： $A[i,j] = 1$ はニューロン $i$ からニューロン $j$ への結合が強さ1であることを意味します。

# @markdown Execute this cell to visualize subsets of connectivity matrix

# Run this cell to visualize the subsets of variables we observe
n_neurons = 25
A = create_connectivity(n_neurons)

fig, axs = plt.subplots(2, 2, figsize=(10, 10))
ratio_observed = [0.75, 0.25]  # the proportion of neurons observed in our system

for i, ratio in enumerate(ratio_observed):
  sel_idx = int(n_neurons * ratio)

  offset = np.zeros((n_neurons, n_neurons))
  axs[i, 1].title.set_text(f"{int(ratio * 100)}% neurons observed")
  offset[:sel_idx, :sel_idx] =  1 + A[:sel_idx, :sel_idx]
  im = axs[i, 1].imshow(offset, cmap="coolwarm", vmin=0, vmax=A.max() + 1)
  axs[i, 1].set_xlabel("Connectivity from")
  axs[i, 1].set_ylabel("Connectivity to")
  plt.colorbar(im, ax=axs[i, 1], fraction=0.046, pad=0.04)
  see_neurons(A, axs[i, 0], ratio)

plt.suptitle("Visualizing subsets of the connectivity matrix")
plt.tight_layout()
plt.show()

インタラクティブデモ3: 観測ニューロン数に応じた回帰性能

まずネットワークで観測するニューロン数を変えて、推定された結合性をこのインタラクティブデモで調べます。推定された結合性はどう変わるでしょうか？

# @markdown Execute this cell to get helper functions `get_regression_estimate_full_connectivity` and `get_regression_corr_full_connectivity`

def get_regression_estimate_full_connectivity(X):
  """
  Estimates the connectivity matrix using lasso regression.

  Args:
    X (np.ndarray): our simulated system of shape (n_neurons, timesteps)
    neuron_idx (int): optionally provide a neuron idx to compute connectivity for
  Returns:
    V (np.ndarray): estimated connectivity matrix of shape (n_neurons, n_neurons).
                    if neuron_idx is specified, V is of shape (n_neurons,).
  """
  n_neurons = X.shape[0]

  # Extract Y and W as defined above
  W = X[:, :-1].transpose()
  Y = X[:, 1:].transpose()

  # apply inverse sigmoid transformation
  Y = logit(Y)

  # fit multioutput regression
  reg = MultiOutputRegressor(Lasso(fit_intercept=False,
                                   alpha=0.01, max_iter=250),
                             n_jobs=-1)
  reg.fit(W, Y)

  V = np.zeros((n_neurons, n_neurons))
  for i, estimator in enumerate(reg.estimators_):
    V[i, :] = estimator.coef_

  return V


def get_regression_corr_full_connectivity(n_neurons, A, X, observed_ratio,
                                          regression_args):
    """
    A wrapper function for our correlation calculations between A and the V estimated
    from regression.

    Args:
      n_neurons (int): number of neurons
      A (np.ndarray): connectivity matrix
      X (np.ndarray): dynamical system
      observed_ratio (float): the proportion of n_neurons observed, must be between 0 and 1.
      regression_args (dict): dictionary of lasso regression arguments and hyperparameters

    Returns:
      A single float correlation value representing the similarity between A and R
    """
    assert (observed_ratio > 0) and (observed_ratio <= 1)

    sel_idx = np.clip(int(n_neurons*observed_ratio), 1, n_neurons)

    sel_X = X[:sel_idx, :]
    sel_A = A[:sel_idx, :sel_idx]

    sel_V = get_regression_estimate_full_connectivity(sel_X)
    return np.corrcoef(sel_A.flatten(), sel_V.flatten())[1, 0], sel_V

# @markdown Execute this cell to enable the widgets

# @markdown Note: The plots will take a few seconds to update after moving the slider.

n_neurons = 50
A = create_connectivity(n_neurons, random_state=42)
X = simulate_neurons(A, 4000, random_state=42)

reg_args = {
    "fit_intercept": False,
    "alpha": 0.001
}

@widgets.interact(n_observed=widgets.IntSlider(min=5, max=45, step=5,
                                               continuous_update=False))
def plot_observed(n_observed):
  to_neuron = 0
  fig, axs = plt.subplots(1, 3, figsize=(15, 5))
  sel_idx = n_observed
  ratio = (n_observed) / n_neurons
  offset = np.zeros((n_neurons, n_neurons))
  axs[0].title.set_text(f"{int(ratio * 100)}% neurons observed")
  offset[:sel_idx, :sel_idx] =  1 + A[:sel_idx, :sel_idx]
  im = axs[1].imshow(offset, cmap="coolwarm", vmin=0, vmax=A.max() + 1)
  plt.colorbar(im, ax=axs[1], fraction=0.046, pad=0.04)

  see_neurons(A,axs[0], ratio, False)
  corr, R = get_regression_corr_full_connectivity(n_neurons, A, X,
                                                  ratio, reg_args)
  big_R = np.zeros(A.shape)
  big_R[:sel_idx, :sel_idx] =  1 + R
  im = axs[2].imshow(big_R, cmap="coolwarm", vmin=0, vmax=A.max() + 1)
  plt.colorbar(im, ax=axs[2],fraction=0.046, pad=0.04)
  c = 'w' if n_observed<(n_neurons-3) else 'k'
  axs[2].text(0, n_observed + 3, f"Correlation: {corr:.2f}", color=c, size=15)
  axs[1].title.set_text("True connectivity")
  axs[1].set_xlabel("Connectivity from")
  axs[1].set_ylabel("Connectivity to")
  axs[2].title.set_text("Estimated connectivity")
  axs[2].set_xlabel("Connectivity from")
  plt.show()

次に、複数の試行で真の結合行列と推定結合行列の相関を、観測ニューロンの割合に対してプロットします。
パフォーマンスと観測ニューロン数の関係はどうなっていますか？

注意: 以下のセルは実行に約25～30秒かかります。

# @markdown Plot correlation vs. subsampling

# we'll simulate many systems for various ratios of observed neurons
n_neurons = 50
timesteps = 5000
ratio_observed = [1, 0.75, 0.5, .25, .125]  # the proportion of neurons observed in our system
n_trials = 3  # run it this many times to get variability in our results

reg_args = {
    "fit_intercept": False,
    "alpha": 0.001
}

corr_data = np.zeros((n_trials, len(ratio_observed)))
for trial in range(n_trials):

  A = create_connectivity(n_neurons, random_state=trial)
  X = simulate_neurons(A, timesteps)
  print(f"simulating trial {trial + 1} of {n_trials}")

  for j, ratio in enumerate(ratio_observed):
    result,_ = get_regression_corr_full_connectivity(n_neurons, A, X,
                                                     ratio, reg_args)
    corr_data[trial, j] = result

corr_mean = np.nanmean(corr_data, axis=0)
corr_std = np.nanstd(corr_data, axis=0)

plt.plot(np.asarray(ratio_observed) * 100, corr_mean)
plt.fill_between(np.asarray(ratio_observed) * 100,
                 corr_mean - corr_std, corr_mean + corr_std,
                 alpha=.2)
plt.xlim([100, 10])
plt.xlabel("Percent of neurons observed")
plt.ylabel("connectivity matrices correlation")
plt.title("Performance of regression\nas a function of the number of neurons observed")
plt.show()

# @title Submit your feedback
content_review(f"{feedback_prefix}_Regression_performance_as_a_function_of_the_number_of_observed_neurons_Interactive_Demo")

まとめ

チュートリアルの推定所要時間: 20分

# @title Video 4: Summary
from ipywidgets import widgets
from IPython.display import YouTubeVideo
from IPython.display import IFrame
from IPython.display import display


class PlayVideo(IFrame):
  def __init__(self, id, source, page=1, width=400, height=300, **kwargs):
    self.id = id
    if source == 'Bilibili':
      src = f'https://player.bilibili.com/player.html?bvid={id}&page={page}'
    elif source == 'Osf':
      src = f'https://mfr.ca-1.osf.io/render?url=https://osf.io/download/{id}/?direct%26mode=render'
    super(PlayVideo, self).__init__(src, width, height, **kwargs)


def display_videos(video_ids, W=400, H=300, fs=1):
  tab_contents = []
  for i, video_id in enumerate(video_ids):
    out = widgets.Output()
    with out:
      if video_ids[i][0] == 'Youtube':
        video = YouTubeVideo(id=video_ids[i][1], width=W,
                             height=H, fs=fs, rel=0)
        print(f'Video available at https://youtube.com/watch?v={video.id}')
      else:
        video = PlayVideo(id=video_ids[i][1], source=video_ids[i][0], width=W,
                          height=H, fs=fs, autoplay=False)
        if video_ids[i][0] == 'Bilibili':
          print(f'Video available at https://www.bilibili.com/video/{video.id}')
        elif video_ids[i][0] == 'Osf':
          print(f'Video available at https://osf.io/{video.id}')
      display(video)
    tab_contents.append(out)
  return tab_contents


video_ids = [('Youtube', 'T1uGf1H31wE'), ('Bilibili', 'BV1bh411o73r')]
tab_contents = display_videos(video_ids, W=854, H=480)
tabs = widgets.Tab()
tabs.children = tab_contents
for i in range(len(tab_contents)):
  tabs.set_title(i, video_ids[i][0])
display(tabs)

# @title Submit your feedback
content_review(f"{feedback_prefix}_Summary")

このチュートリアルで学んだこと：

因果性推定に回帰を使う方法
省略変数バイアスの問題と、それが実際にどのように生じるか