Coverage for ibllib/ephys/ephysqc.py: 68%

1"""

2Quality control of raw Neuropixel electrophysiology data.

3"""

4from pathlib import Path (empty)

5import logging (empty)

6import shutil (empty)

8import numpy as np (empty)

9import pandas as pd (empty)

10from scipy import signal, stats (empty)

11import one.alf.io as alfio (empty)

12from iblutil.util import Bunch (empty)

13import spikeglx (empty)

14import neuropixel (empty)

15from neurodsp import fourier, utils, voltage (empty)

16from tqdm import tqdm (empty)

18from brainbox.io.spikeglx import Streamer (empty)

19from brainbox.metrics.single_units import spike_sorting_metrics (empty)

20from ibllib.ephys import sync_probes, spikes (empty)

21from ibllib.qc import base (empty)

22from ibllib.io.extractors import ephys_fpga, training_wheel (empty)

23from phylib.io import model (empty)

26_logger = logging.getLogger(__name__) (empty)

28RMS_WIN_LENGTH_SECS = 3 (empty)

29WELCH_WIN_LENGTH_SAMPLES = 1024 (empty)

30NCH_WAVEFORMS = 32 # number of channels to be saved in templates.waveforms and channels.waveforms (empty)

31BATCHES_SPACING = 300 (empty)

32TMIN = 40 (empty)

33SAMPLE_LENGTH = 1 (empty)

34SPIKE_THRESHOLD_UV = -50 # negative, the threshold used for spike detection on pre-processed raw data (empty)

37class EphysQC(base.QC): (empty)

38 """

39 A class for computing Ephys QC metrics.

41 :param probe_id: An existing and registered probe insertion ID.

42 :param one: An ONE instance pointing to the database the probe_id is registered with. Optional, will instantiate

43 default database if not given.

44 """

46 def __init__(self, probe_id, session_path=None, **kwargs): (empty)

47 self.use_alyx = kwargs.pop('use_alyx', True) 2 ctx1ba

48 self.stream = kwargs.pop('stream', True) 2 ctx1ba

50 if self.use_alyx: 2 ctx1ba

51 super().__init__(probe_id, endpoint='insertions', **kwargs) 2 ctx1ba

52 self._outcome = 'NOT_SET' 2 ctx1ba

53 self.pid = probe_id 2 ctx1ba

55 self.session_path = session_path 2 ctx1ba

56 keys = ('ap', 'ap_meta', 'lf', 'lf_meta') 2 ctx1ba

57 self.data = Bunch.fromkeys(keys) 2 ctx1ba

58 self.metrics = {} 2 ctx1ba

60 def _ensure_required_data(self): (empty)

61 """

62 Ensures the datasets required for QC are available locally or remotely.

63 """

64 assert self.one is not None, 'ONE instance is required to ensure required data' 2 ctx1ia

65 eid, pname = self.one.pid2eid(self.pid) 2 ctx1ia

66 if self.session_path is None: 2 ctx1ia

67 self.session_path = self.one.eid2path(eid) 1 ctx1i

68 self.probe_path = Path(self.session_path).joinpath('raw_ephys_data', pname) 2 ctx1ia

69 # Check if there is at least one meta file available

70 meta_files = list(self.probe_path.rglob('*.meta')) 2 ctx1ia

71 assert len(meta_files) != 0, f'No meta files in {self.probe_path}' 2 ctx1ia

72 # Check if there is no more than one meta file per type

73 ap_meta = [meta for meta in meta_files if 'ap.meta' in meta.name] 2 ctx1ia

74 assert not len(ap_meta) > 1, f'More than one ap.meta file in {self.probe_path}. Remove redundant files to run QC' 2 ctx1ia

75 lf_meta = [meta for meta in meta_files if 'lf.meta' in meta.name] 2 ctx1ia

76 assert not len(lf_meta) > 1, f'More than one lf.meta file in {self.probe_path}. Remove redundant files to run QC' 2 ctx1ia

78 def load_data(self, ensure=True) -> None: (empty)

79 """

80 Load any locally available data.

81 """

82 # First sanity check

83 if self.use_alyx: 1 ctx1a

84 self._ensure_required_data() 1 ctx1a

86 _logger.info('Gathering data for QC') 1 ctx1a

87 # Load metadata and, if locally present, bin file

88 for dstype in ['ap', 'lf']: 1 ctx1a

89 # We already checked that there is not more than one meta file per type

90 meta_file = next(self.probe_path.rglob(f'*{dstype}.meta'), None) 1 ctx1a

91 if meta_file is None: 1 ctx1a

92 _logger.warning(f'No {dstype}.meta file in {self.probe_path}, skipping QC for {dstype} data.')

93 else:

94 self.data[f'{dstype}_meta'] = spikeglx.read_meta_data(meta_file) 1 ctx1a

95 bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None) 1 ctx1a

96 if not bin_file: 1 ctx1a

97 # we only stream the AP file, we won't stream the full LF file...

98 if dstype == 'ap':

99 self.data[f'{dstype}'] = Streamer(pid=self.pid, one=self.one, remove_cached=True)

100 else:

101 self.data[f'{dstype}'] = None

102 else:

103 self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True) 1 ctx1a

104

105 @staticmethod (empty)

106 def _compute_metrics_array(raw, fs, h): (empty)

107 """

108 From a numpy array, computes rms on raw data, destripes, computes rms on destriped data

109 and performs a simple spike detection

110 :param raw: voltage numpy.array(ntraces, nsamples)

111 :param fs: sampling frequency (Hz)

112 :param h: dictionary containing sensor coordinates, see neuropixel.trace_header

113 :return: 3 numpy vectors nchannels length

114 """

115 destripe = voltage.destripe(raw, fs=fs, h=h) 1 ctx1a

116 rms_raw = utils.rms(raw) 1 ctx1a

117 rms_pre_proc = utils.rms(destripe) 1 ctx1a

118 detections = spikes.detection(data=destripe.T, fs=fs, h=h, detect_threshold=SPIKE_THRESHOLD_UV * 1e-6) 1 ctx1a

119 spike_rate = np.bincount(detections.trace, minlength=raw.shape[0]).astype(np.float32) 1 ctx1a

120 channel_labels, _ = voltage.detect_bad_channels(raw, fs=fs) 1 ctx1a

121 _, psd = signal.welch(destripe, fs=fs, window='hann', nperseg=WELCH_WIN_LENGTH_SAMPLES, 1 ctx1a

122 detrend='constant', return_onesided=True, scaling='density', axis=-1)

123 return rms_raw, rms_pre_proc, spike_rate, channel_labels, psd 1 ctx1a

124

125 def run(self, update: bool = False, overwrite: bool = True, stream: bool = None, **kwargs) -> (str, dict): (empty)

126 """

127 Run QC on samples of the .ap file, and on the entire file for .lf data if it is present.

128

129 :param update: bool, whether to update the qc json fields for this probe. Default is False.

130 :param overwrite: bool, whether to overwrite locally existing outputs of this function. Default is False.

131 :param stream: bool, whether to stream the samples of the .ap data if not locally available. Defaults to value

132 set in class init (True if none set).

133 :return: A list of QC output files. In case of a complete run that is one file for .ap and three files for .lf.

134 """

135 # If stream is explicitly given in run, overwrite value from init

136 if stream is not None: 1 ctx1a

137 self.stream = stream

138

139 # Load data

140 self.load_data() 1 ctx1a

141 self.out_path = kwargs.get('out_path', self.probe_path) 1 ctx1a

142

143 qc_files = [] 1 ctx1a

144 # If ap meta file present, calculate median RMS per channel before and after destriping

145 # NB: ideally this should go a a separate function once we have a spikeglx.Streamer that behaves like the Reader

146 if self.data.ap_meta: 1 ctx1a

147 files = {'rms': self.out_path.joinpath("_iblqc_ephysChannels.apRMS.npy"), 1 ctx1a

148 'spike_rate': self.out_path.joinpath("_iblqc_ephysChannels.rawSpikeRates.npy"),

149 'channel_labels': self.out_path.joinpath("_iblqc_ephysChannels.labels.npy"),

150 'ap_freqs': self.out_path.joinpath("_iblqc_ephysSpectralDensityAP.freqs.npy"),

151 'ap_power': self.out_path.joinpath("_iblqc_ephysSpectralDensityAP.power.npy"),

152 }

153 if all([files[k].exists() for k in files]) and not overwrite: 1 ctx1a

154 _logger.warning(f'RMS map already exists for .ap data in {self.probe_path}, skipping. '

155 f'Use overwrite option.')

156 results = {k: np.load(files[k]) for k in files}

157 else:

158 sr = self.data['ap'] 1 ctx1a

159 nc = sr.nc - sr.nsync 1 ctx1a

160

161 # verify that the channel layout is correct according to IBL layout

162 th = sr.geometry 1 ctx1a

163 if sr.meta.get('NP2.4_shank', None) is not None: 1 ctx1a

164 h = neuropixel.trace_header(sr.major_version, nshank=4)

165 h = neuropixel.split_trace_header(h, shank=int(sr.meta.get('NP2.4_shank')))

166 else:

167 h = neuropixel.trace_header(sr.major_version, nshank=np.unique(th['shank']).size) 1 ctx1a

168

169 if not (np.all(h['x'] == th['x']) and np.all(h['y'] == th['y'])): 1 ctx1a

170 _logger.critical("Channel geometry seems incorrect")

171 # raise ValueError("Wrong Neuropixel channel mapping used - ABORT")

172

173 t0s = np.arange(TMIN, sr.rl - SAMPLE_LENGTH, BATCHES_SPACING) 1 ctx1a

174 all_rms = np.zeros((2, nc, t0s.shape[0])) 1 ctx1a

175 all_srs, channel_ok = (np.zeros((nc, t0s.shape[0])) for _ in range(2)) 1 ctx1a

176 psds = np.zeros((nc, fourier.fscale(WELCH_WIN_LENGTH_SAMPLES, 1, one_sided=True).size)) 1 ctx1a

177

178 _logger.info(f'Computing RMS samples for .ap data {self.probe_path}') 1 ctx1a

179 for i, t0 in enumerate(t0s): 1 ctx1a

180 sl = slice(int(t0 * sr.fs), int((t0 + SAMPLE_LENGTH) * sr.fs)) 1 ctx1a

181 raw = sr[sl, :-sr.nsync].T 1 ctx1a

182 all_rms[0, :, i], all_rms[1, :, i], all_srs[:, i], channel_ok[:, i], psd =\ 1 ctx1a

183 self._compute_metrics_array(raw, sr.fs, h)

184 psds += psd 1 ctx1a

185 # Calculate the median RMS across all samples per channel

186 results = {'rms': np.median(all_rms, axis=-1), 1 ctx1a

187 'spike_rate': np.median(all_srs, axis=-1),

188 'channel_labels': stats.mode(channel_ok, axis=1)[0],

189 'ap_freqs': fourier.fscale(WELCH_WIN_LENGTH_SAMPLES, 1 / sr.fs, one_sided=True),

190 'ap_power': psds.T / len(t0s), # shape: (nfreqs, nchannels)

191 }

192 for k in files: 1 ctx1a

193 np.save(files[k], results[k]) 1 ctx1a

194 qc_files.extend([files[k] for k in files]) 1 ctx1a

195 for p in [10, 90]: 1 ctx1a

196 self.metrics[f'apRms_p{p}_raw'] = np.format_float_scientific( 1 ctx1a

197 np.percentile(results['rms'][0, :], p), precision=2)

198 self.metrics[f'apRms_p{p}_proc'] = np.format_float_scientific( 1 ctx1a

199 np.percentile(results['rms'][1, :], p), precision=2)

200 if update: 1 ctx1a

201 self.update_extended_qc(self.metrics) 1 ctx1a

202 # If lf meta and bin file present, run the old qc on LF data

203 if self.data.lf_meta and self.data.lf: 1 ctx1a

204 qc_files.extend(extract_rmsmap(self.data.lf, out_folder=self.out_path, overwrite=overwrite)) 1 ctx1a

205

206 return qc_files 1 ctx1a

207

208

209def rmsmap(sglx): (empty)

210 """

211 Computes RMS map in time domain and spectra for each channel of Neuropixel probe

212

213 :param sglx: Open spikeglx reader

214 :return: a dictionary with amplitudes in channeltime space, channelfrequency space, time

215 and frequency scales

216 """

217 rms_win_length_samples = 2 ** np.ceil(np.log2(sglx.fs * RMS_WIN_LENGTH_SECS))

218 # the window generator will generates window indices

219 wingen = utils.WindowGenerator(ns=sglx.ns, nswin=rms_win_length_samples, overlap=0)

220 # pre-allocate output dictionary of numpy arrays

221 win = {'TRMS': np.zeros((wingen.nwin, sglx.nc)),

222 'nsamples': np.zeros((wingen.nwin,)),

223 'fscale': fourier.fscale(WELCH_WIN_LENGTH_SAMPLES, 1 / sglx.fs, one_sided=True),

224 'tscale': wingen.tscale(fs=sglx.fs)}

225 win['spectral_density'] = np.zeros((len(win['fscale']), sglx.nc))

226 # loop through the whole session

227 with tqdm(total=wingen.nwin) as pbar:

228 for first, last in wingen.firstlast:

229 D = sglx.read_samples(first_sample=first, last_sample=last)[0].transpose()

230 # remove low frequency noise below 1 Hz

231 D = fourier.hp(D, 1 / sglx.fs, [0, 1])

232 iw = wingen.iw

233 win['TRMS'][iw, :] = utils.rms(D)

234 win['nsamples'][iw] = D.shape[1]

235 # the last window may be smaller than what is needed for welch

236 if last - first < WELCH_WIN_LENGTH_SAMPLES:

237 continue

238 # compute a smoothed spectrum using welch method

239 _, w = signal.welch(

240 D, fs=sglx.fs, window='hann', nperseg=WELCH_WIN_LENGTH_SAMPLES,

241 detrend='constant', return_onesided=True, scaling='density', axis=-1

242 )

243 win['spectral_density'] += w.T

244 # print at least every 20 windows

245 if (iw % min(20, max(int(np.floor(wingen.nwin / 75)), 1))) == 0:

246 pbar.update(iw)

247 sglx.close()

248 return win

249

250

251def extract_rmsmap(sglx, out_folder=None, overwrite=False): (empty)

252 """

253 Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files

254

255 :param sglx: Open spikeglx Reader with data for which to compute rmsmap

256 :param out_folder: folder in which to store output ALF files. Default uses the folder in which

257 the `fbin` file lives.

258 :param overwrite: do not re-extract if all ALF files already exist

259 :param label: string or list of strings that will be appended to the filename before extension

260 :return: None

261 """

262 if out_folder is None: 1 ctx1a

263 out_folder = sglx.file_bin.parent

264 else:

265 out_folder = Path(out_folder) 1 ctx1a

266 _logger.info(f"Computing RMS map for .{sglx.type} data in {out_folder}") 1 ctx1a

267 alf_object_time = f'ephysTimeRms{sglx.type.upper()}' 1 ctx1a

268 alf_object_freq = f'ephysSpectralDensity{sglx.type.upper()}' 1 ctx1a

269 files_time = list(out_folder.glob(f"_iblqc_{alf_object_time}*")) 1 ctx1a

270 files_freq = list(out_folder.glob(f"_iblqc_{alf_object_freq}*")) 1 ctx1a

271 if (len(files_time) == 2 == len(files_freq)) and not overwrite: 1 ctx1a

272 _logger.warning(f'RMS map already exists for .{sglx.type} data in {out_folder}, skipping. Use overwrite option.') 1 ctx1a

273 return files_time + files_freq 1 ctx1a

274 # crunch numbers

275 rms = rmsmap(sglx)

276 # output ALF files, single precision with the optional label as suffix before extension

277 if not out_folder.exists():

278 out_folder.mkdir()

279 tdict = {'rms': rms['TRMS'].astype(np.single), 'timestamps': rms['tscale'].astype(np.single)}

280 fdict = {'power': rms['spectral_density'].astype(np.single),

281 'freqs': rms['fscale'].astype(np.single)}

282 out_time = alfio.save_object_npy(

283 out_folder, object=alf_object_time, dico=tdict, namespace='iblqc')

284 out_freq = alfio.save_object_npy(

285 out_folder, object=alf_object_freq, dico=fdict, namespace='iblqc')

286 return out_time + out_freq

287

288

289def raw_qc_session(session_path, overwrite=False): (empty)

290 """

291 Wrapper that exectutes QC from a session folder and outputs the results whithin the same folder

292 as the original raw data.

293 :param session_path: path of the session (Subject/yyyy-mm-dd/number

294 :param overwrite: bool (False) Force means overwriting an existing QC file

295 :return: None

296 """

297 efiles = spikeglx.glob_ephys_files(session_path)

298 qc_files = []

299 for efile in efiles:

300 if efile.get('ap') and efile.ap.exists():

301 qc_files.extend(extract_rmsmap(efile.ap, out_folder=None, overwrite=overwrite))

302 if efile.get('lf') and efile.lf.exists():

303 qc_files.extend(extract_rmsmap(efile.lf, out_folder=None, overwrite=overwrite))

304 return qc_files

305

306

307def validate_ttl_test(ses_path, display=False): (empty)

308 """

309 For a mock session on the Ephys Choice world task, check the sync channels for all

310 device properly connected and perform a synchronization if dual probes to check that

311 all channels are recorded properly

312 :param ses_path: session path

313 :param display: show the probe synchronization plot if several probes

314 :return: True if tests pass, errors otherwise

315 """

316

317 def _single_test(assertion, str_ok, str_ko): 6 ctx1jefgcd

318 if assertion: 5 ctx1efgcd

319 _logger.info(str_ok) 5 ctx1efgcd

320 return True 5 ctx1efgcd

321 else:

322 _logger.error(str_ko)

323 return False

324

325 EXPECTED_RATES_HZ = {'left_camera': 60, 'right_camera': 150, 'body_camera': 30} 6 ctx1jefgcd

326 SYNC_RATE_HZ = 1 6 ctx1jefgcd

327 MIN_TRIALS_NB = 6 6 ctx1jefgcd

328

329 ok = True 6 ctx1jefgcd

330 ses_path = Path(ses_path) 6 ctx1jefgcd

331 if not ses_path.exists(): 6 ctx1jefgcd

332 return False

333

334 # get the synchronization fronts (from the raw binary if necessary)

335 ephys_fpga.extract_sync(session_path=ses_path, overwrite=False) 6 ctx1jefgcd

336 rawsync, sync_map = ephys_fpga.get_main_probe_sync(ses_path) 6 ctx1jefgcd

337 last_time = rawsync['times'][-1] 5 ctx1efgcd

338

339 # get upgoing fronts for each

340 sync = Bunch({}) 5 ctx1efgcd

341 for k in sync_map: 5 ctx1efgcd

342 fronts = ephys_fpga.get_sync_fronts(rawsync, sync_map[k]) 5 ctx1efgcd

343 sync[k] = fronts['times'][fronts['polarities'] == 1] 5 ctx1efgcd

344 wheel = ephys_fpga.extract_wheel_sync(rawsync, chmap=sync_map) 5 ctx1efgcd

345

346 frame_rates = {'right_camera': np.round(1 / np.median(np.diff(sync.right_camera))), 5 ctx1efgcd

347 'left_camera': np.round(1 / np.median(np.diff(sync.left_camera))),

348 'body_camera': np.round(1 / np.median(np.diff(sync.body_camera)))}

349

350 # check the camera frame rates

351 for lab in frame_rates: 5 ctx1efgcd

352 expect = EXPECTED_RATES_HZ[lab] 5 ctx1efgcd

353 ok &= _single_test(assertion=abs((1 - frame_rates[lab] / expect)) < 0.1, 5 ctx1efgcd

354 str_ok=f'PASS: {lab} frame rate: {frame_rates[lab]} = {expect} Hz',

355 str_ko=f'FAILED: {lab} frame rate: {frame_rates[lab]} != {expect} Hz')

356

357 # check that the wheel has a minimum rate of activity on both channels

358 re_test = abs(1 - sync.rotary_encoder_1.size / sync.rotary_encoder_0.size) < 0.1 5 ctx1efgcd

359 re_test &= len(wheel[1]) / last_time > 5 5 ctx1efgcd

360 ok &= _single_test(assertion=re_test, 5 ctx1efgcd

361 str_ok="PASS: Rotary encoder", str_ko="FAILED: Rotary encoder")

362 # check that the frame 2 ttls has a minimum rate of activity

363 ok &= _single_test(assertion=len(sync.frame2ttl) / last_time > 0.2, 5 ctx1efgcd

364 str_ok="PASS: Frame2TTL", str_ko="FAILED: Frame2TTL")

365 # the audio has to have at least one event per trial

366 ok &= _single_test(assertion=len(sync.bpod) > len(sync.audio) > MIN_TRIALS_NB, 5 ctx1efgcd

367 str_ok="PASS: audio", str_ko="FAILED: audio")

368 # the bpod has to have at least twice the amount of min trial pulses

369 ok &= _single_test(assertion=len(sync.bpod) > MIN_TRIALS_NB * 2, 5 ctx1efgcd

370 str_ok="PASS: Bpod", str_ko="FAILED: Bpod")

371 try: 5 ctx1efgcd

372 # note: tried to depend as little as possible on the extraction code but for the valve...

373 bpod = ephys_fpga.get_sync_fronts(rawsync, sync_map['bpod']) 5 ctx1efgcd

374 _, t_valve_open, _ = ephys_fpga._assign_events_bpod(bpod['times'], bpod['polarities']) 5 ctx1efgcd

375 res = t_valve_open.size > 1 5 ctx1efgcd

376 except AssertionError:

377 res = False

378 # check that the reward valve is actionned at least once

379 ok &= _single_test(assertion=res, 5 ctx1efgcd

380 str_ok="PASS: Valve open", str_ko="FAILED: Valve open not detected")

381 _logger.info('ALL CHECKS PASSED !') 5 ctx1efgcd

382

383 # the imec sync is for 3B Probes only

384 if sync.get('imec_sync') is not None: 5 ctx1efgcd

385 ok &= _single_test(assertion=np.all(1 - SYNC_RATE_HZ * np.diff(sync.imec_sync) < 0.1), 2 ctx1cd

386 str_ok="PASS: imec sync", str_ko="FAILED: imec sync")

387

388 # second step is to test that we can make the sync. Assertions are whithin the synch code

389 if sync.get('imec_sync') is not None: 5 ctx1efgcd

390 sync_result, _ = sync_probes.version3B(ses_path, display=display) 2 ctx1cd

391 else:

392 sync_result, _ = sync_probes.version3A(ses_path, display=display) 3 ctx1efg

393

394 ok &= _single_test(assertion=sync_result, str_ok="PASS: synchronisation", 5 ctx1efgcd

395 str_ko="FAILED: probe synchronizations threshold exceeded")

396

397 if not ok: 5 ctx1efgcd

398 raise ValueError('FAILED TTL test')

399 return ok 5 ctx1efgcd

400

401

402def spike_sorting_metrics_ks2(ks2_path=None, m=None, save=True, save_path=None): (empty)

403 """

404 Given a path containing kilosort 2 output, compute quality metrics and optionally save them

405 to a clusters_metric.csv file

406 :param ks2_path:

407 :param save

408 :param save_path: If not given will save into the path given as ks2_path

409 :return:

410 """

411

412 save_path = save_path or ks2_path

413

414 # ensure that either a ks2_path or a phylib `TemplateModel` object with unit info is given

415 assert not (ks2_path is None and m is None), 'Must either specify a path to a ks2 output ' \

416 'directory, or a phylib `TemplateModel` object'

417 # create phylib `TemplateModel` if not given

418 m = phy_model_from_ks2_path(ks2_path) if None else m

419 c, drift = spike_sorting_metrics(m.spike_times, m.spike_clusters, m.amplitudes, m.depths,

420 cluster_ids=np.arange(m.clusters_channels.size))

421 # include the ks2 cluster contamination if `cluster_ContamPct` file exists

422 file_contamination = ks2_path.joinpath('cluster_ContamPct.tsv')

423 if file_contamination.exists():

424 contam = pd.read_csv(file_contamination, sep='\t')

425 contam.rename(columns={'ContamPct': 'ks2_contamination_pct'}, inplace=True)

426 c = c.set_index('cluster_id', drop=False).join(contam.set_index('cluster_id'))

427

428 # include the ks2 cluster labels if `cluster_KSLabel` file exists

429 file_labels = ks2_path.joinpath('cluster_KSLabel.tsv')

430 if file_labels.exists():

431 ks2_labels = pd.read_csv(file_labels, sep='\t')

432 ks2_labels.rename(columns={'KSLabel': 'ks2_label'}, inplace=True)

433 c = c.set_index('cluster_id', drop=False).join(ks2_labels.set_index('cluster_id'))

434

435 if save:

436 Path(save_path).mkdir(exist_ok=True, parents=True)

437 # the file name contains the label of the probe (directory name in this case)

438 c.to_csv(Path(save_path).joinpath('cluster_metrics.csv'))

439

440 return c

441

442

443def phy_model_from_ks2_path(ks2_path, bin_path, bin_file=None): (empty)

444 if not bin_file: 1 ctx1a

445 bin_file = next(bin_path.rglob('*.ap.*bin'), None)

446 meta_file = next(bin_path.rglob('*.ap.meta'), None) 1 ctx1a

447 if meta_file and meta_file.exists(): 1 ctx1a

448 meta = spikeglx.read_meta_data(meta_file) 1 ctx1a

449 fs = spikeglx._get_fs_from_meta(meta) 1 ctx1a

450 nch = (spikeglx._get_nchannels_from_meta(meta) - 1 ctx1a

451 len(spikeglx._get_sync_trace_indices_from_meta(meta)))

452 else:

453 fs = 30000

454 nch = 384

455 m = model.TemplateModel(dir_path=ks2_path, 1 ctx1a

456 dat_path=bin_file, # this assumes the raw data is in the same folder

457 sample_rate=fs,

458 n_channels_dat=nch,

459 n_closest_channels=NCH_WAVEFORMS)

460 m.depths = m.get_depths() 1 ctx1a

461 return m 1 ctx1a

462

463

464# Make a bunch gathering all trial QC

465def qc_fpga_task(fpga_trials, alf_trials): (empty)

466 """

467 :fpga_task is the dictionary output of

468 ibllib.io.extractors.ephys_fpga.extract_behaviour_sync

469 : bpod_trials is the dictionary output of ibllib.io.extractors.ephys_trials.extract_all

470 : alf_trials is the ALF _ibl_trials object after extraction (alfio.load_object)

471 :return: qc_session, qc_trials, True means QC passes while False indicates a failure

472 """

473

474 GOCUE_STIMON_DELAY = 0.01 # -> 0.1 1 ctx1h

475 FEEDBACK_STIMFREEZE_DELAY = 0.01 # -> 0.1 1 ctx1h

476 VALVE_STIM_OFF_DELAY = 1 1 ctx1h

477 VALVE_STIM_OFF_JITTER = 0.1 1 ctx1h

478 ITI_IN_STIM_OFF_JITTER = 0.1 1 ctx1h

479 ERROR_STIM_OFF_DELAY = 2 1 ctx1h

480 ERROR_STIM_OFF_JITTER = 0.1 1 ctx1h

481 RESPONSE_FEEDBACK_DELAY = 0.0005 1 ctx1h

482

483 def strictly_after(t0, t1, threshold): 1 ctx1h

484 """ returns isafter, iswithinthreshold"""

485 return (t1 - t0) > 0, np.abs((t1 - t0)) <= threshold 1 ctx1h

486

487 ntrials = fpga_trials['stimOn_times'].size 1 ctx1h

488 qc_trials = Bunch({}) 1 ctx1h

489

490 """ 1 ctx1h

491 First Check consistency of the dataset: whithin each trial, all events happen after trial

492 start should not be NaNs and increasing. This is not a QC but an assertion.

493 """

494 status = True 1 ctx1h

495 for k in ['response_times', 'stimOn_times', 'response_times', 1 ctx1h

496 'goCueTrigger_times', 'goCue_times', 'feedback_times']:

497 if k.endswith('_bpod'): 1 ctx1h

498 tstart = alf_trials['intervals_bpod'][:, 0]

499 else:

500 tstart = alf_trials['intervals'][:, 0] 1 ctx1h

501 selection = ~np.isnan(alf_trials[k]) 1 ctx1h

502 status &= np.all(alf_trials[k][selection] - tstart[selection] > 0) 1 ctx1h

503 status &= np.all(np.diff(alf_trials[k][selection]) > 0) 1 ctx1h

504 assert status 1 ctx1h

505

506 """ 1 ctx1h

507 This part of the function uses only fpga_trials information

508 """

509 # check number of feedbacks: should always be one

510 qc_trials['n_feedback'] = (np.uint32(~np.isnan(fpga_trials['valveOpen_times'])) + 1 ctx1h

511 np.uint32(~np.isnan(fpga_trials['errorCue_times'])))

512

513 # check for non-Nans

514 qc_trials['stimOn_times_nan'] = ~np.isnan(fpga_trials['stimOn_times']) 1 ctx1h

515 qc_trials['goCue_times_nan'] = ~np.isnan(fpga_trials['goCue_times']) 1 ctx1h

516

517 # stimOn before goCue

518 qc_trials['stimOn_times_before_goCue_times'], qc_trials['stimOn_times_goCue_times_delay'] =\ 1 ctx1h

519 strictly_after(fpga_trials['stimOn_times'], fpga_trials['goCue_times'], GOCUE_STIMON_DELAY)

520

521 # stimFreeze before feedback

522 qc_trials['stim_freeze_before_feedback'], qc_trials['stim_freeze_feedback_delay'] = \ 1 ctx1h

523 strictly_after(fpga_trials['stimFreeze_times'], fpga_trials['feedback_times'],

524 FEEDBACK_STIMFREEZE_DELAY)

525

526 # stimOff 1 sec after valve, with 0.1 as acceptable jitter

527 qc_trials['stimOff_delay_valve'] = np.less( 1 ctx1h

528 np.abs(

529 fpga_trials['stimOff_times'] - fpga_trials['valveOpen_times'] - VALVE_STIM_OFF_DELAY

530 ),

531 VALVE_STIM_OFF_JITTER, out=np.ones(ntrials, dtype=bool),

532 where=~np.isnan(fpga_trials['valveOpen_times']))

533

534 # iti_in whithin 0.01 sec of stimOff

535 qc_trials['iti_in_delay_stim_off'] = \ 1 ctx1h

536 np.abs(fpga_trials['stimOff_times'] - fpga_trials['itiIn_times']) < ITI_IN_STIM_OFF_JITTER

537

538 # stimOff 2 secs after errorCue_times with jitter

539 # noise off happens 2 secs after stimm, with 0.1 as acceptable jitter

540 qc_trials['stimOff_delay_noise'] = np.less( 1 ctx1h

541 np.abs(

542 fpga_trials['stimOff_times'] - fpga_trials['errorCue_times'] - ERROR_STIM_OFF_DELAY

543 ),

544 ERROR_STIM_OFF_JITTER, out=np.ones(ntrials, dtype=bool),

545 where=~np.isnan(fpga_trials['errorCue_times']))

546

547 """ 1 ctx1h

548 This part uses only alf_trials information

549 """

550 # TEST Response times (from session start) should be increasing continuously

551 # Note: RT are not durations but time stamps from session start

552 # 1. check for non-Nans

553 qc_trials['response_times_nan'] = ~np.isnan(alf_trials['response_times']) 1 ctx1h

554 # 2. check for positive increase

555 qc_trials['response_times_increase'] = \ 1 ctx1h

556 np.diff(np.append([0], alf_trials['response_times'])) > 0

557 # TEST Response times (from goCue) should be positive

558 qc_trials['response_times_goCue_times_diff'] = \ 1 ctx1h

559 alf_trials['response_times'] - alf_trials['goCue_times'] > 0

560 # TEST 1. Response_times should be before feedback

561 qc_trials['response_before_feedback'] = \ 1 ctx1h

562 alf_trials['feedback_times'] - alf_trials['response_times'] > 0

563 # 2. Delay between wheel reaches threshold (response time) and

564 # feedback is 100us, acceptable jitter 500 us

565 qc_trials['response_feedback_delay'] = \ 1 ctx1h

566 alf_trials['feedback_times'] - alf_trials['response_times'] < RESPONSE_FEEDBACK_DELAY

567

568 # Test output at session level

569 qc_session = {k: np.all(qc_trials[k]) for k in qc_trials} 1 ctx1h

570

571 return qc_session, qc_trials 1 ctx1h

572

573

574def _qc_from_path(sess_path, display=True): (empty)

575 WHEEL = False

576 sess_path = Path(sess_path)

577 temp_alf_folder = sess_path.joinpath('fpga_test', 'alf')

578 temp_alf_folder.mkdir(parents=True, exist_ok=True)

579

580 sync, chmap = ephys_fpga.get_main_probe_sync(sess_path, bin_exists=False)

581 _ = ephys_fpga.extract_all(sess_path, output_path=temp_alf_folder, save=True)

582 # check that the output is complete

583 fpga_trials = ephys_fpga.extract_behaviour_sync(sync, chmap=chmap, display=display)

584 # align with the bpod

585 bpod2fpga = ephys_fpga.align_with_bpod(temp_alf_folder.parent)

586 alf_trials = alfio.load_object(temp_alf_folder, 'trials')

587 shutil.rmtree(temp_alf_folder)

588 # do the QC

589 qcs, qct = qc_fpga_task(fpga_trials, alf_trials)

590

591 # do the wheel part

592 if WHEEL:

593 bpod_wheel = training_wheel.get_wheel_data(sess_path, save=False)

594 fpga_wheel = ephys_fpga.extract_wheel_sync(sync, chmap=chmap, save=False)

595

596 if display:

597 import matplotlib.pyplot as plt

598 t0 = max(np.min(bpod2fpga(bpod_wheel['re_ts'])), np.min(fpga_wheel['re_ts']))

599 dy = np.interp(t0, fpga_wheel['re_ts'], fpga_wheel['re_pos']) - np.interp(

600 t0, bpod2fpga(bpod_wheel['re_ts']), bpod_wheel['re_pos'])

601

602 fix, axes = plt.subplots(nrows=2, sharex='all', sharey='all')

603 # axes[0].plot(t, pos), axes[0].title.set_text('Extracted')

604 axes[0].plot(bpod2fpga(bpod_wheel['re_ts']), bpod_wheel['re_pos'] + dy)

605 axes[0].plot(fpga_wheel['re_ts'], fpga_wheel['re_pos'])

606 axes[0].title.set_text('FPGA')

607 axes[1].plot(bpod2fpga(bpod_wheel['re_ts']), bpod_wheel['re_pos'] + dy)

608 axes[1].title.set_text('Bpod')

609

610 return alfio.dataframe({**fpga_trials, **alf_trials, **qct})