Coverage for ibllib/pipes/mesoscope_tasks.py: 83%

554 statements  

« prev     ^ index     » next       coverage.py v7.5.4, created at 2024-07-08 17:16 +0100

1"""The mesoscope data extraction pipeline. 

2 

3The mesoscope pipeline currently comprises raw image registration and timestamps extraction. In 

4the future there will be compression (and potential cropping), FOV metadata extraction, and ROI 

5extraction. 

6 

7Pipeline: 

8 1. Register reference images and upload snapshots and notes to Alyx 

9 2. Run ROI cell detection 

10 3. Calculate the pixel and ROI brain locations and register fields of view to Alyx 

11 4. Compress the raw imaging data 

12 5. Extract the imaging times from the main DAQ 

13""" 

14import json 

15import logging 

16import subprocess 

17import shutil 

18import uuid 

19from pathlib import Path 

20from itertools import chain 

21from collections import defaultdict, Counter 

22from fnmatch import fnmatch 

23import enum 

24import re 

25import time 

26 

27import numba as nb 

28import numpy as np 

29import pandas as pd 

30import sparse 

31from scipy.io import loadmat 

32from scipy.interpolate import interpn 

33import one.alf.io as alfio 

34from one.alf.spec import is_valid, to_alf 

35from one.alf.files import filename_parts, session_path_parts 

36import one.alf.exceptions as alferr 

37 

38from ibllib.pipes import base_tasks 

39from ibllib.io.extractors import mesoscope 

40from iblatlas.atlas import ALLEN_CCF_LANDMARKS_MLAPDV_UM, MRITorontoAtlas 

41 

42 

43_logger = logging.getLogger(__name__) 

44Provenance = enum.Enum('Provenance', ['ESTIMATE', 'FUNCTIONAL', 'LANDMARK', 'HISTOLOGY']) # py3.11 make StrEnum 

45 

46 

47class MesoscopeRegisterSnapshots(base_tasks.MesoscopeTask, base_tasks.RegisterRawDataTask): 

48 """Upload snapshots as Alyx notes and register the 2P reference image(s).""" 

49 priority = 100 

50 job_size = 'small' 

51 

52 @property 

53 def signature(self): 

54 signature = { 1pm

55 'input_files': [('referenceImage.raw.tif', f'{self.device_collection}/reference', False), 

56 ('referenceImage.stack.tif', f'{self.device_collection}/reference', False), 

57 ('referenceImage.meta.json', f'{self.device_collection}/reference', False)], 

58 'output_files': [('referenceImage.raw.tif', f'{self.device_collection}/reference', False), 

59 ('referenceImage.stack.tif', f'{self.device_collection}/reference', False), 

60 ('referenceImage.meta.json', f'{self.device_collection}/reference', False)] 

61 } 

62 return signature 1pm

63 

64 def __init__(self, session_path, **kwargs): 

65 super().__init__(session_path, **kwargs) 1qpm

66 self.device_collection = self.get_device_collection('mesoscope', 1qpm

67 kwargs.get('device_collection', 'raw_imaging_data_*')) 

68 

69 def _run(self): 

70 """ 

71 Assert one reference image per collection and rename it. Register snapshots. 

72 

73 Returns 

74 ------- 

75 list of pathlib.Path containing renamed reference image. 

76 """ 

77 # Assert that only one tif file exists per collection 

78 file, collection, _ = self.signature['input_files'][0] 1m

79 reference_images = list(self.session_path.rglob(f'{collection}/{file}')) 1m

80 assert len(set(x.parent for x in reference_images)) == len(reference_images) 1m

81 # Rename the reference images 

82 out_files = super()._run() 1m

83 # Register snapshots in base session folder and raw_imaging_data folders 

84 self.register_snapshots(collection=[self.device_collection, '']) 1m

85 return out_files 1m

86 

87 

88class MesoscopeCompress(base_tasks.MesoscopeTask): 

89 """ Tar compress raw 2p tif files, optionally remove uncompressed data.""" 

90 

91 priority = 90 

92 job_size = 'large' 

93 _log_level = None 

94 

95 @property 

96 def signature(self): 

97 signature = { 1f

98 'input_files': [('*.tif', self.device_collection, True)], 

99 'output_files': [('imaging.frames.tar.bz2', self.device_collection, True)] 

100 } 

101 return signature 1f

102 

103 def setUp(self, **kwargs): 

104 """Run at higher log level""" 

105 self._log_level = _logger.level 1f

106 _logger.setLevel(logging.DEBUG) 1f

107 return super().setUp(**kwargs) 1f

108 

109 def tearDown(self): 

110 _logger.setLevel(self._log_level or logging.INFO) 1f

111 return super().tearDown() 1f

112 

113 def _run(self, remove_uncompressed=False, verify_output=True, clobber=False, **kwargs): 

114 """ 

115 Run tar compression on all tif files in the device collection. 

116 

117 Parameters 

118 ---------- 

119 remove_uncompressed: bool 

120 Whether to remove the original, uncompressed data. Default is False. 

121 verify_output: bool 

122 Whether to check that the compressed tar file can be uncompressed without errors. 

123 Default is True. 

124 

125 Returns 

126 ------- 

127 list of pathlib.Path 

128 Path to compressed tar file. 

129 """ 

130 outfiles = [] # should be one per raw_imaging_data folder 1f

131 input_files = defaultdict(list) 1f

132 for file, in_dir, _ in self.input_files: 1f

133 input_files[self.session_path.joinpath(in_dir)].append(file) 1f

134 

135 for in_dir, files in input_files.items(): 1f

136 outfile = in_dir / self.output_files[0][0] 1f

137 if outfile.exists() and not clobber: 1f

138 _logger.info('%s already exists; skipping...', outfile.relative_to(self.session_path)) 

139 continue 

140 # glob for all input patterns 

141 infiles = list(chain(*map(lambda x: in_dir.glob(x), files))) 1f

142 if not infiles: 1f

143 _logger.info('No image files found in %s', in_dir.relative_to(self.session_path)) 

144 continue 

145 

146 _logger.debug( 1f

147 'Input files:\n\t%s', '\n\t'.join(map(Path.as_posix, (x.relative_to(self.session_path) for x in infiles))) 

148 ) 

149 

150 uncompressed_size = sum(x.stat().st_size for x in infiles) 1f

151 _logger.info('Compressing %i file(s)', len(infiles)) 1f

152 cmd = 'tar -cjvf "{output}" "{input}"'.format( 1f

153 output=outfile.relative_to(in_dir), input='" "'.join(str(x.relative_to(in_dir)) for x in infiles)) 

154 _logger.debug(cmd) 1f

155 process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=in_dir) 1f

156 info, error = process.communicate() # b'2023-02-17_2_test_2P_00001_00001.tif\n' 1f

157 _logger.debug(info.decode()) 1f

158 assert process.returncode == 0, f'compression failed: {error.decode()}' 1f

159 

160 # Check the output 

161 assert outfile.exists(), 'output file missing' 1f

162 outfiles.append(outfile) 1f

163 compressed_size = outfile.stat().st_size 1f

164 min_size = kwargs.pop('verify_min_size', 1024) 1f

165 assert compressed_size > int(min_size), f'Compressed file < {min_size / 1024:.0f}KB' 1f

166 _logger.info('Compression ratio = %.3f, saving %.2f pct (%.2f MB)', 1f

167 uncompressed_size / compressed_size, 

168 round((1 - (compressed_size / uncompressed_size)) * 10000) / 100, 

169 (uncompressed_size - compressed_size) / 1024 / 1024) 

170 

171 if verify_output: 1f

172 # Test bzip 

173 cmd = f'bzip2 -tv {outfile.relative_to(in_dir)}' 1f

174 process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=in_dir) 1f

175 info, error = process.communicate() 1f

176 _logger.debug(info.decode()) 1f

177 assert process.returncode == 0, f'bzip compression test failed: {error}' 1f

178 # Check tar 

179 cmd = f'bunzip2 -dc {outfile.relative_to(in_dir)} | tar -tvf -' 1f

180 process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=in_dir) 1f

181 info, error = process.communicate() 1f

182 _logger.debug(info.decode()) 1f

183 assert process.returncode == 0, 'tarball decompression test failed' 1f

184 compressed_files = set(x.split()[-1] for x in filter(None, info.decode().split('\n'))) 1f

185 assert compressed_files == set(x.name for x in infiles) 1f

186 

187 if remove_uncompressed: 1f

188 _logger.info(f'Removing input files for {in_dir.relative_to(self.session_path)}') 1f

189 for file in infiles: 1f

190 file.unlink() 1f

191 

192 return outfiles 1f

193 

194 

195class MesoscopePreprocess(base_tasks.MesoscopeTask): 

196 """Run suite2p preprocessing on tif files.""" 

197 

198 priority = 80 

199 cpu = -1 

200 job_size = 'large' 

201 env = 'suite2p' 

202 

203 @property 

204 def signature(self): 

205 # The number of in and outputs will be dependent on the number of input raw imaging folders and output FOVs 

206 signature = { 1e

207 'input_files': [('_ibl_rawImagingData.meta.json', self.device_collection, True), 

208 ('*.tif', self.device_collection, True), 

209 ('exptQC.mat', self.device_collection, False)], 

210 'output_files': [('mpci.ROIActivityF.npy', 'alf/FOV*', True), 

211 ('mpci.ROINeuropilActivityF.npy', 'alf/FOV*', True), 

212 ('mpci.ROIActivityDeconvolved.npy', 'alf/FOV*', True), 

213 ('mpci.badFrames.npy', 'alf/FOV*', True), 

214 ('mpci.mpciFrameQC.npy', 'alf/FOV*', True), 

215 ('mpciFrameQC.names.tsv', 'alf/FOV*', True), 

216 ('mpciMeanImage.images.npy', 'alf/FOV*', True), 

217 ('mpciROIs.stackPos.npy', 'alf/FOV*', True), 

218 ('mpciROIs.mpciROITypes.npy', 'alf/FOV*', True), 

219 ('mpciROIs.cellClassifier.npy', 'alf/FOV*', True), 

220 ('mpciROIs.uuids.csv', 'alf/FOV*', True), 

221 ('mpciROITypes.names.tsv', 'alf/FOV*', True), 

222 ('mpciROIs.masks.npy', 'alf/FOV*', True), 

223 ('mpciROIs.neuropilMasks.npy', 'alf/FOV*', True), 

224 ('_suite2p_ROIData.raw.zip', self.device_collection, False)] 

225 } 

226 return signature 1e

227 

228 @staticmethod 

229 def _masks2sparse(stat, ops): 

230 """ 

231 Extract 3D sparse mask arrays from suit2p output. 

232 

233 Parameters 

234 ---------- 

235 stat : numpy.array 

236 The loaded stat.npy file. A structured array with fields ('lam', 'ypix', 'xpix', 'neuropil_mask'). 

237 ops : numpy.array 

238 The loaded ops.npy file. A structured array with fields ('Ly', 'Lx'). 

239 

240 Returns 

241 ------- 

242 sparse.GCXS 

243 A pydata sparse array of type float32, representing the ROI masks. 

244 sparse.GCXS 

245 A pydata sparse array of type float32, representing the neuropil ROI masks. 

246 

247 Notes 

248 ----- 

249 Save using sparse.save_npz. 

250 """ 

251 shape = (stat.shape[0], ops['Ly'], ops['Lx']) 1dc

252 npx = np.prod(shape[1:]) # Number of pixels per time point 1dc

253 coords = [[], [], []] 1dc

254 data = [] 1dc

255 pil_coords = [] 1dc

256 for i, s in enumerate(stat): 1dc

257 coords[0].append(np.full(s['ypix'].shape, i)) 1dc

258 coords[1].append(s['ypix']) 1dc

259 coords[2].append(s['xpix']) 1dc

260 data.append(s['lam']) 1dc

261 pil_coords.append(s['neuropil_mask'] + i * npx) 1dc

262 roi_mask_sp = sparse.COO(list(map(np.concatenate, coords)), np.concatenate(data), shape=shape) 1dc

263 pil_mask_sp = sparse.COO(np.unravel_index(np.concatenate(pil_coords), shape), True, shape=shape) 1dc

264 return sparse.GCXS.from_coo(roi_mask_sp), sparse.GCXS.from_coo(pil_mask_sp) 1dc

265 

266 def _rename_outputs(self, suite2p_dir, frameQC_names, frameQC, rename_dict=None): 

267 """ 

268 Convert suite2p output files to ALF datasets. 

269 

270 Parameters 

271 ---------- 

272 suite2p_dir : pathlib.Path 

273 rename_dict : dict or None 

274 The suite2p output filenames and the corresponding ALF name. NB: These files are saved 

275 after transposition. Default is None, i.e. using the default mapping hardcoded in the function below. 

276 

277 Returns 

278 ------- 

279 list of pathlib.Path 

280 All paths found in FOV folders. 

281 """ 

282 if rename_dict is None: 1dc

283 rename_dict = { 1dc

284 'F.npy': 'mpci.ROIActivityF.npy', 

285 'spks.npy': 'mpci.ROIActivityDeconvolved.npy', 

286 'Fneu.npy': 'mpci.ROINeuropilActivityF.npy' 

287 } 

288 # Rename the outputs, first the subdirectories 

289 for plane_dir in suite2p_dir.iterdir(): 1dc

290 # ignore the combined dir 

291 if plane_dir.name != 'combined': 1dc

292 n = int(plane_dir.name.split('plane')[1]) 1dc

293 fov_dir = plane_dir.parent.joinpath(f'FOV_{n:02}') 1dc

294 if fov_dir.exists(): 1dc

295 shutil.rmtree(str(fov_dir), ignore_errors=False, onerror=None) 

296 plane_dir.rename(fov_dir) 1dc

297 # Now rename the content of the new directories and move them out of suite2p 

298 for fov_dir in suite2p_dir.iterdir(): 1dc

299 # Compress suite2p output files 

300 target = suite2p_dir.parent.joinpath(fov_dir.name) 1dc

301 target.mkdir(exist_ok=True) 1dc

302 # Move bin file out of the way first 

303 if fov_dir.joinpath('data.bin').exists(): 1dc

304 dst = self.session_path.joinpath('raw_bin_files', fov_dir.name, 'data.bin') 

305 dst.parent.mkdir(parents=True, exist_ok=True) 

306 _logger.debug('Moving bin file to %s', dst.relative_to(self.session_path)) 

307 fov_dir.joinpath('data.bin').replace(dst) 

308 # Set logger to warning for the moment to not clutter the logs 

309 prev_level = _logger.level 1dc

310 _logger.setLevel(logging.WARNING) 1dc

311 shutil.make_archive(str(target / '_suite2p_ROIData.raw'), 'zip', fov_dir, logger=_logger) 1dc

312 _logger.setLevel(prev_level) 1dc

313 if fov_dir != 'combined': 1dc

314 # save frameQC in each dir (for now, maybe there will be fov specific frame QC eventually) 

315 if frameQC is not None and len(frameQC) > 0: 1dc

316 np.save(fov_dir.joinpath('mpci.mpciFrameQC.npy'), frameQC) 1c

317 frameQC_names.to_csv(fov_dir.joinpath('mpciFrameQC.names.tsv'), sep='\t', index=False) 1c

318 

319 # extract some other data from suite2p outputs 

320 ops = np.load(fov_dir.joinpath('ops.npy'), allow_pickle=True).item() 1dc

321 stat = np.load(fov_dir.joinpath('stat.npy'), allow_pickle=True) 1dc

322 iscell = np.load(fov_dir.joinpath('iscell.npy')) 1dc

323 

324 # Save suite2p ROI activity outputs in transposed from (n_frames, n_ROI) 

325 for k, v in rename_dict.items(): 1dc

326 np.save(fov_dir.joinpath(v), np.load(fov_dir.joinpath(k)).T) 1dc

327 # fov_dir.joinpath(k).unlink() # Keep original files for suite2P GUI 

328 np.save(fov_dir.joinpath('mpci.badFrames.npy'), np.asarray(ops['badframes'], dtype=bool)) 1dc

329 np.save(fov_dir.joinpath('mpciMeanImage.images.npy'), np.asarray(ops['meanImg'], dtype=float)) 1dc

330 np.save(fov_dir.joinpath('mpciROIs.stackPos.npy'), np.asarray([(*s['med'], 0) for s in stat], dtype=int)) 1dc

331 np.save(fov_dir.joinpath('mpciROIs.cellClassifier.npy'), np.asarray(iscell[:, 1], dtype=float)) 1dc

332 np.save(fov_dir.joinpath('mpciROIs.mpciROITypes.npy'), np.asarray(iscell[:, 0], dtype=np.int16)) 1dc

333 # clusters uuids 

334 uuid_list = ['uuids'] + list(map(str, [uuid.uuid4() for _ in range(len(iscell))])) 1dc

335 with open(fov_dir.joinpath('mpciROIs.uuids.csv'), 'w+') as fid: 1dc

336 fid.write('\n'.join(uuid_list)) 1dc

337 

338 pd.DataFrame([(0, 'no cell'), (1, 'cell')], columns=['roi_values', 'roi_labels'] 1dc

339 ).to_csv(fov_dir.joinpath('mpciROITypes.names.tsv'), sep='\t', index=False) 

340 # ROI and neuropil masks 

341 roi_mask, pil_mask = self._masks2sparse(stat, ops) 1dc

342 with open(fov_dir.joinpath('mpciROIs.masks.sparse_npz'), 'wb') as fp: 1dc

343 sparse.save_npz(fp, roi_mask) 1dc

344 with open(fov_dir.joinpath('mpciROIs.neuropilMasks.sparse_npz'), 'wb') as fp: 1dc

345 sparse.save_npz(fp, pil_mask) 1dc

346 # move folders out of suite2p dir 

347 # We overwrite existing files 

348 for file in filter(lambda x: is_valid(x.name), fov_dir.iterdir()): 1dc

349 target_file = target.joinpath(file.name) 1dc

350 if target_file.exists(): 1dc

351 target_file.unlink() 

352 file.rename(target_file) 1dc

353 shutil.rmtree(str(suite2p_dir), ignore_errors=False, onerror=None) 1dc

354 # Collect all files in those directories 

355 return list(suite2p_dir.parent.rglob('FOV*/*')) 1dc

356 

357 @staticmethod 

358 def _check_meta_data(meta_data_all: list) -> dict: 

359 """ 

360 Check that the meta data is consistent across all raw imaging folders. 

361 

362 Parameters 

363 ---------- 

364 meta_data_all: list of dicts 

365 List of metadata dictionaries to be checked for consistency. 

366 

367 Returns 

368 ------- 

369 dict 

370 Single, consolidated dictionary containing metadata. 

371 """ 

372 # Ignore the things we don't expect to match 

373 ignore = ('acquisitionStartTime', 'nFrames') 

374 ignore_sub = {'rawScanImageMeta': ('ImageDescription', 'Software')} 

375 

376 def equal_dicts(a, b, skip=None): 

377 ka = set(a).difference(skip or ()) 

378 kb = set(b).difference(skip or ()) 

379 return ka == kb and all(a[key] == b[key] for key in ka) 

380 

381 # Compare each dict with the first one in the list 

382 for i, meta in enumerate(meta_data_all[1:]): 

383 if meta != meta_data_all[0]: # compare entire object first 

384 for k, v in meta_data_all[0].items(): # check key by key 

385 if not (equal_dicts(v, meta[k], ignore_sub[k]) # compare sub-dicts... 

386 if k in ignore_sub else # ... if we have keys to ignore in test 

387 not (k in ignore or v == meta[k])): 

388 _logger.warning(f'Mismatch in meta data between raw_imaging_data folders for key {k}. ' 

389 f'Using meta_data from first folder!') 

390 return meta_data_all[0] 

391 

392 @staticmethod 

393 def _consolidate_exptQC(exptQC): 

394 """ 

395 Consolidate exptQC.mat files into a single file. 

396 

397 Parameters 

398 ---------- 

399 exptQC : list of pandas.DataFrame 

400 The loaded 'exptQC.mat' files as squeezed and simplified data frames, with columns 

401 {'frameQC_frames', 'frameQC_names'}. 

402 

403 Returns 

404 ------- 

405 numpy.array 

406 An array of uint8 where 0 indicates good frames, and other values correspond to 

407 experimenter-defined issues (in 'qc_values' column of output data frame). 

408 pandas.DataFrame 

409 A data frame with columns {'qc_values', 'qc_labels'}, the former an unsigned int 

410 corresponding to a QC code; the latter a human-readable QC explanation. 

411 numpy.array 

412 An array of frame indices where QC code != 0. 

413 """ 

414 

415 # Merge and make sure same indexes have same names across all files 

416 frameQC_names_list = [e['frameQC_names'] for e in exptQC] 

417 frameQC_names_list = [{f: 0} if isinstance(f, str) else {f[i]: i for i in range(len(f))} 

418 for f in frameQC_names_list] 

419 frameQC_names = {k: v for d in frameQC_names_list for k, v in d.items()} 

420 for d in frameQC_names_list: 

421 for k, v in d.items(): 

422 if frameQC_names[k] != v: 

423 raise IOError(f'exptQC.mat files have different values for name "{k}"') 

424 

425 frameQC_names = pd.DataFrame(sorted([(v, k) for k, v in frameQC_names.items()]), 

426 columns=['qc_values', 'qc_labels']) 

427 

428 # Concatenate frames 

429 frameQC = np.concatenate([e['frameQC_frames'] for e in exptQC], axis=0) 

430 bad_frames = np.where(frameQC != 0)[0] 

431 return frameQC, frameQC_names, bad_frames 

432 

433 def get_default_tau(self): 

434 """ 

435 Determine the tau (fluorescence decay) from the subject's genotype. 

436 

437 Returns 

438 ------- 

439 float 

440 The tau value to use. 

441 

442 See Also 

443 -------- 

444 https://suite2p.readthedocs.io/en/latest/settings.html 

445 """ 

446 # These settings are from the suite2P documentation 

447 TAU_MAP = {'G6s': 1.5, 'G6m': 1., 'G6f': .7, 'default': 1.5} 1n

448 _, subject, *_ = session_path_parts(self.session_path) 1n

449 genotype = self.one.alyx.rest('subjects', 'read', id=subject)['genotype'] 1n

450 match = next(filter(None, (re.match(r'.+-(G\d[fms])$', g['allele']) for g in genotype)), None) 1n

451 key = match.groups()[0] if match else 'default' 1n

452 return TAU_MAP.get(key, TAU_MAP['default']) 1n

453 

454 def _create_db(self, meta): 

455 """ 

456 Create the ops dictionary for suite2p based on metadata. 

457 

458 Parameters 

459 ---------- 

460 meta: dict 

461 Imaging metadata. 

462 

463 Returns 

464 ------- 

465 dict 

466 Inputs to suite2p run that deviate from default parameters. 

467 """ 

468 

469 # Computing dx and dy 

470 cXY = np.array([fov['Deg']['topLeft'] for fov in meta['FOV']]) 1e

471 cXY -= np.min(cXY, axis=0) 1e

472 nXnYnZ = np.array([fov['nXnYnZ'] for fov in meta['FOV']]) 1e

473 

474 # Currently supporting z-stacks but not supporting dual plane / volumetric imaging, assert that this is not the case 

475 if np.any(nXnYnZ[:, 2] > 1): 1e

476 raise NotImplementedError('Dual-plane imaging not yet supported, data seems to more than one plane per FOV') 

477 

478 sW = np.sqrt(np.sum((np.array([fov['Deg']['topRight'] for fov in meta['FOV']]) - np.array( 1e

479 [fov['Deg']['topLeft'] for fov in meta['FOV']])) ** 2, axis=1)) 

480 sH = np.sqrt(np.sum((np.array([fov['Deg']['bottomLeft'] for fov in meta['FOV']]) - np.array( 1e

481 [fov['Deg']['topLeft'] for fov in meta['FOV']])) ** 2, axis=1)) 

482 pixSizeX = nXnYnZ[:, 0] / sW 1e

483 pixSizeY = nXnYnZ[:, 1] / sH 1e

484 dx = np.round(cXY[:, 0] * pixSizeX).astype(dtype=np.int32) 1e

485 dy = np.round(cXY[:, 1] * pixSizeY).astype(dtype=np.int32) 1e

486 nchannels = len(meta['channelSaved']) if isinstance(meta['channelSaved'], list) else 1 1e

487 

488 # Computing number of unique z-planes (slices in tiff) 

489 # FIXME this should work if all FOVs are discrete or if all FOVs are continuous, but may not work for combination of both 

490 slice_ids = [fov['slice_id'] for fov in meta['FOV']] 1e

491 nplanes = len(set(slice_ids)) 1e

492 

493 # Figuring out how many SI Rois we have (one unique ROI may have several FOVs) 

494 # FIXME currently unused 

495 # roiUUIDs = np.array([fov['roiUUID'] for fov in meta['FOV']]) 

496 # nrois = len(np.unique(roiUUIDs)) 

497 

498 db = { 1e

499 'data_path': sorted(map(str, self.session_path.glob(f'{self.device_collection}'))), 

500 'save_path0': str(self.session_path.joinpath('alf')), 

501 'fast_disk': '', # TODO 

502 'look_one_level_down': False, # don't look in the children folders as that is where the reference data is 

503 'num_workers': self.cpu, # this selects number of cores to parallelize over for the registration step 

504 'num_workers_roi': -1, # for parallelization over FOVs during cell detection, for now don't 

505 'keep_movie_raw': False, 

506 'delete_bin': False, # TODO: delete this on the long run 

507 'batch_size': 500, # SP reduced this from 1000 

508 'nimg_init': 400, 

509 'combined': False, 

510 'nonrigid': True, 

511 'maxregshift': 0.05, # default = 1 

512 'denoise': 1, # whether binned movie should be denoised before cell detection 

513 'block_size': [128, 128], 

514 'save_mat': True, # save the data to Fall.mat 

515 'move_bin': True, # move the binary file to save_path 

516 'mesoscan': True, 

517 'nplanes': nplanes, 

518 'nrois': len(meta['FOV']), 

519 'nchannels': nchannels, 

520 'fs': meta['scanImageParams']['hRoiManager']['scanVolumeRate'], 

521 'lines': [list(np.asarray(fov['lineIdx']) - 1) for fov in meta['FOV']], # subtracting 1 to make 0-based 

522 'slices': slice_ids, # this tells us which FOV corresponds to which tiff slices 

523 'tau': self.get_default_tau(), # deduce the GCamp used from Alyx mouse line (defaults to 1.5; that of GCaMP6s) 

524 'functional_chan': 1, # for now, eventually find(ismember(meta.channelSaved == meta.channelID.green)) 

525 'align_by_chan': 1, # for now, eventually find(ismember(meta.channelSaved == meta.channelID.red)) 

526 'dx': dx, 

527 'dy': dy 

528 } 

529 

530 return db 1e

531 

532 def _run(self, run_suite2p=True, rename_files=True, use_badframes=True, **kwargs): 

533 """ 

534 Process inputs, run suite2p and make outputs alf compatible. 

535 

536 Parameters 

537 ---------- 

538 run_suite2p: bool 

539 Whether to run suite2p, default is True. 

540 rename_files: bool 

541 Whether to rename and reorganize the suite2p outputs to be alf compatible. Defaults is True. 

542 use_badframes: bool 

543 Whether to exclude bad frames indicated by the experimenter in exptQC.mat. Default is currently False 

544 due to bug in suite2p. Change this in the future. 

545 

546 Returns 

547 ------- 

548 list of pathlib.Path 

549 All files created by the task. 

550 """ 

551 import suite2p 1e

552 

553 """ Metadata and parameters """ 1e

554 # Load metadata and make sure all metadata is consistent across FOVs 

555 meta_files = sorted(self.session_path.glob(f'{self.device_collection}/*rawImagingData.meta.*')) 1e

556 collections = sorted(set(f.parts[-2] for f in meta_files)) 1e

557 # Check there is exactly 1 meta file per collection 

558 assert len(meta_files) == len(list(self.session_path.glob(self.device_collection))) == len(collections) 1e

559 rawImagingData = [mesoscope.patch_imaging_meta(alfio.load_file_content(filepath)) for filepath in meta_files] 1e

560 if len(rawImagingData) > 1: 1e

561 meta = self._check_meta_data(rawImagingData) 

562 else: 

563 meta = rawImagingData[0] 1e

564 # Get default ops 

565 ops = suite2p.default_ops() 1e

566 # Create db which overwrites ops when passed to suite2p, with information from meta data and hardcoded 

567 db = self._create_db(meta) 1e

568 # Anything can be overwritten by keyword arguments passed to the tasks run() method 

569 for k, v in kwargs.items(): 1e

570 if k in ops.keys() or k in db.keys(): 1e

571 # db overwrites ops when passed to run_s2p, so we only need to update / add it here 

572 db[k] = v 1e

573 # Update the task kwargs attribute as it will be stored in the arguments json field in alyx 

574 self.kwargs = {**self.kwargs, **db} 1e

575 

576 """ Bad frames """ 1e

577 # exptQC.mat contains experimenter QC values that may not affect ROI detection (e.g. noises, pauses) 

578 qc_paths = (self.session_path.joinpath(f[1], 'exptQC.mat') 1e

579 for f in self.input_files if f[0] == 'exptQC.mat') 

580 qc_paths = sorted(map(str, filter(Path.exists, qc_paths))) 1e

581 exptQC = [loadmat(p, squeeze_me=True, simplify_cells=True) for p in qc_paths] 1e

582 if len(exptQC) > 0: 1e

583 frameQC, frameQC_names, _ = self._consolidate_exptQC(exptQC) 

584 else: 

585 _logger.warning('No frame QC (exptQC.mat) files found.') 1e

586 frameQC = np.array([], dtype='u1') 1e

587 frameQC_names = pd.DataFrame(columns=['qc_values', 'qc_labels']) 1e

588 

589 # If applicable, save as bad_frames.npy in first raw_imaging_folder for suite2p 

590 # badframes.mat contains QC values that do affect ROI detection (e.g. no PMT, lens artefacts) 

591 badframes = np.array([], dtype='i8') 1e

592 total_frames = 0 1e

593 # Ensure all indices are relative to total cumulative frames 

594 for m, collection in zip(rawImagingData, collections): 1e

595 badframes_path = self.session_path.joinpath(collection, 'badframes.mat') 1e

596 if badframes_path.exists(): 1e

597 raw_mat = loadmat(badframes_path, squeeze_me=True, simplify_cells=True)['badframes'] 

598 badframes = np.r_[badframes, raw_mat + total_frames] 

599 total_frames += m['nFrames'] 1e

600 if len(badframes) > 0 and use_badframes is True: 1e

601 np.save(Path(db['data_path'][0]).joinpath('bad_frames.npy'), badframes) 

602 

603 """ Suite2p """ 1e

604 # Create alf it is doesn't exist 

605 self.session_path.joinpath('alf').mkdir(exist_ok=True) 1e

606 # Remove existing suite2p dir if it exists 

607 suite2p_dir = Path(db['save_path0']).joinpath('suite2p') 1e

608 if suite2p_dir.exists(): 1e

609 shutil.rmtree(str(suite2p_dir), ignore_errors=True, onerror=None) 

610 # Run suite2p 

611 if run_suite2p: 1e

612 _ = suite2p.run_s2p(ops=ops, db=db) 

613 

614 """ Outputs """ 1e

615 # Save and rename other outputs 

616 if rename_files: 1e

617 out_files = self._rename_outputs(suite2p_dir, frameQC_names, frameQC) 

618 else: 

619 out_files = list(Path(db['save_path0']).joinpath('suite2p').rglob('*')) 1e

620 # Only return output file that are in the signature (for registration) 

621 out_files = [f for f in out_files if f.name in [f[0] for f in self.output_files]] 1e

622 return out_files 1e

623 

624 

625class MesoscopeSync(base_tasks.MesoscopeTask): 

626 """Extract the frame times from the main DAQ.""" 

627 

628 priority = 40 

629 job_size = 'small' 

630 

631 @property 

632 def signature(self): 

633 signature = { 1ik

634 'input_files': [(f'_{self.sync_namespace}_DAQdata.raw.npy', self.sync_collection, True), 

635 (f'_{self.sync_namespace}_DAQdata.timestamps.npy', self.sync_collection, True), 

636 (f'_{self.sync_namespace}_DAQdata.meta.json', self.sync_collection, True), 

637 ('_ibl_rawImagingData.meta.json', self.device_collection, True), 

638 ('rawImagingData.times_scanImage.npy', self.device_collection, True, True), # register raw 

639 (f'_{self.sync_namespace}_softwareEvents.log.htsv', self.sync_collection, False), ], 

640 'output_files': [('mpci.times.npy', 'alf/mesoscope/FOV*', True), 

641 ('mpciStack.timeshift.npy', 'alf/mesoscope/FOV*', True),] 

642 } 

643 return signature 1ik

644 

645 def _run(self): 

646 """ 

647 Extract the imaging times for all FOVs. 

648 

649 Returns 

650 ------- 

651 list of pathlib.Path 

652 Files containing frame timestamps for individual FOVs and time offsets for each line scan. 

653 

654 """ 

655 # TODO function to determine nFOVs 

656 try: 1ik

657 alf_path = self.session_path / self.sync_collection 1ik

658 events = alfio.load_object(alf_path, 'softwareEvents').get('log') 1ik

659 except alferr.ALFObjectNotFound: 1i

660 events = None 1i

661 if events is None or events.empty: 1ik

662 _logger.debug('No software events found for session %s', self.session_path) 1i

663 collections = set(collection for _, collection, _ in self.input_files 1ik

664 if fnmatch(collection, self.device_collection)) 

665 # Load first meta data file to determine the number of FOVs 

666 # Changing FOV between imaging bouts is not supported currently! 

667 self.rawImagingData = alfio.load_object(self.session_path / next(iter(collections)), 'rawImagingData') 1ik

668 self.rawImagingData['meta'] = mesoscope.patch_imaging_meta(self.rawImagingData['meta']) 1ik

669 n_FOVs = len(self.rawImagingData['meta']['FOV']) 1ik

670 sync, chmap = self.load_sync() # Extract sync data from raw DAQ data 1ik

671 mesosync = mesoscope.MesoscopeSyncTimeline(self.session_path, n_FOVs) 1ik

672 _, out_files = mesosync.extract( 1ik

673 save=True, sync=sync, chmap=chmap, device_collection=collections, events=events) 

674 return out_files 1ik

675 

676 

677class MesoscopeFOV(base_tasks.MesoscopeTask): 

678 """Create FOV and FOV location objects in Alyx from metadata.""" 

679 

680 priority = 40 

681 job_size = 'small' 

682 

683 @property 

684 def signature(self): 

685 signature = { 1g

686 'input_files': [('_ibl_rawImagingData.meta.json', self.device_collection, True), 

687 ('mpciROIs.stackPos.npy', 'alf/FOV*', True)], 

688 'output_files': [('mpciMeanImage.brainLocationIds*.npy', 'alf/FOV_*', True), 

689 ('mpciMeanImage.mlapdv*.npy', 'alf/FOV_*', True), 

690 ('mpciROIs.mlapdv*.npy', 'alf/FOV_*', True), 

691 ('mpciROIs.brainLocationIds*.npy', 'alf/FOV_*', True), 

692 ('_ibl_rawImagingData.meta.json', self.device_collection, True)] 

693 } 

694 return signature 1g

695 

696 def _run(self, *args, provenance=Provenance.ESTIMATE, **kwargs): 

697 """ 

698 Register fields of view (FOV) to Alyx and extract the coordinates and IDs of each ROI. 

699 

700 Steps: 

701 1. Save the mpciMeanImage.brainLocationIds_estimate and mlapdv datasets. 

702 2. Use mean image coordinates and ROI stack position datasets to extract brain location 

703 of each ROI. 

704 3. Register the location of each FOV in Alyx. 

705 

706 Parameters 

707 ---------- 

708 provenance : Provenance 

709 The provenance of the coordinates in the meta file. For all but 'HISTOLOGY', the 

710 provenance is added as a dataset suffix. Defaults to ESTIMATE. 

711 

712 Returns 

713 ------- 

714 dict 

715 The newly created FOV Alyx record. 

716 list 

717 The newly created FOV location Alyx records. 

718 

719 Notes 

720 ----- 

721 - Once the FOVs have been registered they cannot be updated with this task. Rerunning this 

722 task will result in an error. 

723 - This task modifies the first meta JSON file. All meta files are registered by this task. 

724 """ 

725 # Load necessary data 

726 (filename, collection, _), *_ = self.signature['input_files'] 1g

727 meta_files = sorted(self.session_path.glob(f'{collection}/{filename}')) 1g

728 meta = mesoscope.patch_imaging_meta(alfio.load_file_content(meta_files[0]) or {}) 1g

729 nFOV = len(meta.get('FOV', [])) 1g

730 

731 suffix = None if provenance is Provenance.HISTOLOGY else provenance.name.lower() 1g

732 _logger.info('Extracting %s MLAPDV datasets', suffix or 'final') 1g

733 

734 # Extract mean image MLAPDV coordinates and brain location IDs 

735 mean_image_mlapdv, mean_image_ids = self.project_mlapdv(meta) 1g

736 

737 # Save the meta data file with new coordinate fields 

738 with open(meta_files[0], 'w') as fp: 1g

739 json.dump(meta, fp) 1g

740 

741 # Save the mean image datasets 

742 mean_image_files = [] 1g

743 assert set(mean_image_mlapdv.keys()) == set(mean_image_ids.keys()) and len(mean_image_ids) == nFOV 1g

744 for i in range(nFOV): 1g

745 alf_path = self.session_path.joinpath('alf', f'FOV_{i:02}') 1g

746 for attr, arr, sfx in (('mlapdv', mean_image_mlapdv[i], suffix), 1g

747 ('brainLocationIds', mean_image_ids[i], ('ccf', '2017', suffix))): 

748 mean_image_files.append(alf_path / to_alf('mpciMeanImage', attr, 'npy', timescale=sfx)) 1g

749 np.save(mean_image_files[-1], arr) 1g

750 

751 # Extract ROI MLAPDV coordinates and brain location IDs 

752 roi_mlapdv, roi_brain_ids = self.roi_mlapdv(nFOV, suffix=suffix) 1g

753 

754 # Write MLAPDV + brain location ID of ROIs to disk 

755 roi_files = [] 1g

756 assert set(roi_mlapdv.keys()) == set(roi_brain_ids.keys()) and len(roi_mlapdv) == nFOV 1g

757 for i in range(nFOV): 1g

758 alf_path = self.session_path.joinpath('alf', f'FOV_{i:02}') 1g

759 for attr, arr, sfx in (('mlapdv', roi_mlapdv[i], suffix), 1g

760 ('brainLocationIds', roi_brain_ids[i], ('ccf', '2017', suffix))): 

761 roi_files.append(alf_path / to_alf('mpciROIs', attr, 'npy', timescale=sfx)) 1g

762 np.save(roi_files[-1], arr) 1g

763 

764 # Register FOVs in Alyx 

765 self.register_fov(meta, suffix) 1g

766 

767 return sorted([*meta_files, *roi_files, *mean_image_files]) 1g

768 

769 def update_surgery_json(self, meta, normal_vector): 

770 """ 

771 Update surgery JSON with surface normal vector. 

772 

773 Adds the key 'surface_normal_unit_vector' to the most recent surgery JSON, containing the 

774 provided three element vector. The recorded craniotomy center must match the coordinates 

775 in the provided meta file. 

776 

777 Parameters 

778 ---------- 

779 meta : dict 

780 The imaging meta data file containing the 'centerMM' key. 

781 normal_vector : array_like 

782 A three element unit vector normal to the surface of the craniotomy center. 

783 

784 Returns 

785 ------- 

786 dict 

787 The updated surgery record, or None if no surgeries found. 

788 """ 

789 if not self.one or self.one.offline: 1jb

790 _logger.warning('failed to update surgery JSON: ONE offline') 1jb

791 return 1jb

792 # Update subject JSON with unit normal vector of craniotomy centre (used in histology) 

793 subject = self.one.path2ref(self.session_path, parse=False)['subject'] 1j

794 surgeries = self.one.alyx.rest('surgeries', 'list', subject=subject, procedure='craniotomy') 1j

795 if not surgeries: 1j

796 _logger.error(f'Surgery not found for subject "{subject}"') 1j

797 return 1j

798 surgery = surgeries[0] # Check most recent surgery in list 1j

799 center = (meta['centerMM']['ML'], meta['centerMM']['AP']) 1j

800 match = (k for k, v in surgery['json'].items() if 1j

801 str(k).startswith('craniotomy') and np.allclose(v['center'], center)) 

802 if (key := next(match, None)) is None: 1j

803 _logger.error('Failed to update surgery JSON: no matching craniotomy found') 1j

804 return surgery 1j

805 data = {key: {**surgery['json'][key], 'surface_normal_unit_vector': tuple(normal_vector)}} 1j

806 surgery['json'] = self.one.alyx.json_field_update('subjects', subject, data=data) 1j

807 return surgery 1j

808 

809 def roi_mlapdv(self, nFOV: int, suffix=None): 

810 """ 

811 Extract ROI MLAPDV coordinates and brain location IDs. 

812 

813 MLAPDV coordinates are in um relative to bregma. Location IDs are from the 2017 Allen 

814 common coordinate framework atlas. 

815 

816 Parameters 

817 ---------- 

818 nFOV : int 

819 The number of fields of view acquired. 

820 suffix : {None, 'estimate'} 

821 The attribute suffix of the mpciMeanImage datasets to load. If generating from 

822 estimates, the suffix should be 'estimate'. 

823 

824 Returns 

825 ------- 

826 dict of int : numpy.array 

827 A map of field of view to ROI MLAPDV coordinates. 

828 dict of int : numpy.array 

829 A map of field of view to ROI brain location IDs. 

830 """ 

831 all_mlapdv = {} 1g

832 all_brain_ids = {} 1g

833 for n in range(nFOV): 1g

834 alf_path = self.session_path.joinpath('alf', f'FOV_{n:02}') 1g

835 

836 # Load neuron centroids in pixel space 

837 stack_pos_file = next(alf_path.glob('mpciROIs.stackPos*'), None) 1g

838 if not stack_pos_file: 1g

839 raise FileNotFoundError(alf_path / 'mpci.stackPos*') 1g

840 stack_pos = alfio.load_file_content(stack_pos_file) 1g

841 

842 # Load MLAPDV + brain location ID maps of pixels 

843 mpciMeanImage = alfio.load_object( 1g

844 alf_path, 'mpciMeanImage', attribute=['mlapdv', 'brainLocationIds']) 

845 

846 # Get centroid MLAPDV + brainID by indexing pixel-map with centroid locations 

847 mlapdv = np.full(stack_pos.shape, np.nan) 1g

848 brain_ids = np.full(stack_pos.shape[0], np.nan) 1g

849 for i in np.arange(stack_pos.shape[0]): 1g

850 idx = (stack_pos[i, 0], stack_pos[i, 1]) 1g

851 sfx = f'_{suffix}' if suffix else '' 1g

852 mlapdv[i, :] = mpciMeanImage['mlapdv' + sfx][idx] 1g

853 brain_ids[i] = mpciMeanImage['brainLocationIds_ccf_2017' + sfx][idx] 1g

854 assert ~np.isnan(brain_ids).any() 1g

855 all_brain_ids[n] = brain_ids.astype(int) 1g

856 all_mlapdv[n] = mlapdv 1g

857 

858 return all_mlapdv, all_brain_ids 1g

859 

860 @staticmethod 

861 def get_provenance(filename): 

862 """ 

863 Get the field of view provenance from a mpciMeanImage or mpciROIs dataset. 

864 

865 Parameters 

866 ---------- 

867 filename : str, pathlib.Path 

868 A filename to get the provenance from. 

869 

870 Returns 

871 ------- 

872 Provenance 

873 The provenance of the file. 

874 """ 

875 filename = Path(filename).name 1o

876 timescale = (filename_parts(filename)[3] or '').split('_') 1o

877 provenances = [i.name.lower() for i in Provenance] 1o

878 provenance = (Provenance[x.upper()] for x in timescale if x in provenances) 1o

879 return next(provenance, None) or Provenance.HISTOLOGY 1o

880 

881 def register_fov(self, meta: dict, suffix: str = None) -> (list, list): 

882 """ 

883 Create FOV on Alyx. 

884 

885 Assumes field of view recorded perpendicular to objective. 

886 Assumes field of view is plane (negligible volume). 

887 

888 Required Alyx fixtures: 

889 - experiments.ImagingType(name='mesoscope') 

890 - experiments.CoordinateSystem(name='IBL-Allen') 

891 

892 Parameters 

893 ---------- 

894 meta : dict 

895 The raw imaging meta data from _ibl_rawImagingData.meta.json. 

896 suffix : str 

897 The file attribute suffixes to load from the mpciMeanImage object. Either 'estimate' or 

898 None. No suffix means the FOV location provenance will be L (Landmark). 

899 

900 Returns 

901 ------- 

902 list of dict 

903 A list registered of field of view entries from Alyx. 

904 

905 TODO Determine dual plane ID for JSON field 

906 """ 

907 dry = self.one is None or self.one.offline 1h

908 alyx_fovs = [] 1h

909 # Count the number of slices per stack ID: only register stacks that contain more than one slice. 

910 slice_counts = Counter(f['roiUUID'] for f in meta.get('FOV', [])) 1h

911 # Create a new stack in Alyx for all stacks containing more than one slice. 

912 # Map of ScanImage ROI UUID to Alyx ImageStack UUID. 

913 if dry: 1h

914 stack_ids = {i: uuid.uuid4() for i in slice_counts if slice_counts[i] > 1} 1h

915 else: 

916 stack_ids = {i: self.one.alyx.rest('imaging-stack', 'create', data={'name': i})['id'] 1h

917 for i in slice_counts if slice_counts[i] > 1} 

918 

919 for i, fov in enumerate(meta.get('FOV', [])): 1h

920 assert set(fov.keys()) >= {'MLAPDV', 'nXnYnZ', 'roiUUID'} 1h

921 # Field of view 

922 alyx_FOV = { 1h

923 'session': self.session_path.as_posix() if dry else self.path2eid(), 

924 'imaging_type': 'mesoscope', 'name': f'FOV_{i:02}', 

925 'stack': stack_ids.get(fov['roiUUID']) 

926 } 

927 if dry: 1h

928 print(alyx_FOV) 1h

929 alyx_FOV['location'] = [] 1h

930 alyx_fovs.append(alyx_FOV) 1h

931 else: 

932 alyx_fovs.append(self.one.alyx.rest('fields-of-view', 'create', data=alyx_FOV)) 1h

933 

934 # Field of view location 

935 data = {'field_of_view': alyx_fovs[-1].get('id'), 1h

936 'default_provenance': True, 

937 'coordinate_system': 'IBL-Allen', 

938 'n_xyz': fov['nXnYnZ']} 

939 if suffix: 1h

940 data['provenance'] = suffix[0].upper() 1h

941 

942 # Convert coordinates to 4 x 3 array (n corners by n dimensions) 

943 # x1 = top left ml, y1 = top left ap, y2 = top right ap, etc. 

944 coords = [fov['MLAPDV'][key] for key in ('topLeft', 'topRight', 'bottomLeft', 'bottomRight')] 1h

945 coords = np.vstack(coords).T 1h

946 data.update({k: arr.tolist() for k, arr in zip('xyz', coords)}) 1h

947 

948 # Load MLAPDV + brain location ID maps of pixels 

949 filename = 'mpciMeanImage.brainLocationIds_ccf_2017' + (f'_{suffix}' if suffix else '') + '.npy' 1h

950 filepath = self.session_path.joinpath('alf', f'FOV_{i:02}', filename) 1h

951 mean_image_ids = alfio.load_file_content(filepath) 1h

952 

953 data['brain_region'] = np.unique(mean_image_ids).astype(int).tolist() 1h

954 

955 if dry: 1h

956 print(data) 1h

957 alyx_FOV['location'].append(data) 1h

958 else: 

959 alyx_fovs[-1]['location'].append(self.one.alyx.rest('fov-location', 'create', data=data)) 1h

960 return alyx_fovs 1h

961 

962 def load_triangulation(self): 

963 """ 

964 Load the surface triangulation file. 

965 

966 A triangle mesh of the smoothed convex hull of the dorsal surface of the mouse brain, 

967 generated from the 2017 Allen 10um annotation volume. This triangulation was generated in 

968 MATLAB. 

969 

970 Returns 

971 ------- 

972 points : numpy.array 

973 An N by 3 float array of x-y vertices, defining all points of the triangle mesh. These 

974 are in um relative to the IBL bregma coordinates. 

975 connectivity_list : numpy.array 

976 An N by 3 integer array of vertex indices defining all points that form a triangle. 

977 """ 

978 fixture_path = Path(mesoscope.__file__).parent.joinpath('mesoscope') 1b

979 surface_triangulation = np.load(fixture_path / 'surface_triangulation.npz') 1b

980 points = surface_triangulation['points'].astype('f8') 1b

981 connectivity_list = surface_triangulation['connectivity_list'] 1b

982 surface_triangulation.close() 1b

983 return points, connectivity_list 1b

984 

985 def project_mlapdv(self, meta, atlas=None): 

986 """ 

987 Calculate the mean image pixel locations in MLAPDV coordinates and determine the brain 

988 location IDs. 

989 

990 MLAPDV coordinates are in um relative to bregma. Location IDs are from the 2017 Allen 

991 common coordinate framework atlas. 

992 

993 Parameters 

994 ---------- 

995 meta : dict 

996 The raw imaging data meta file, containing coordinates for the centre of each field of 

997 view. 

998 atlas : ibllib.atlas.Atlas 

999 An atlas instance. 

1000 

1001 Returns 

1002 ------- 

1003 dict 

1004 A map of FOV number (int) to mean image MLAPDV coordinates as a 2D numpy array. 

1005 dict 

1006 A map of FOV number (int) to mean image brain location IDs as a 2D numpy int array. 

1007 """ 

1008 mlapdv = {} 1b

1009 location_id = {} 1b

1010 # Use the MRI atlas as this applies scaling, particularly along the DV axis to (hopefully) 

1011 # more accurately represent the living brain. 

1012 atlas = atlas or MRITorontoAtlas(res_um=10) 1b

1013 # The centre of the craniotomy / imaging window 

1014 coord_ml = meta['centerMM']['ML'] * 1e3 # mm -> um 1b

1015 coord_ap = meta['centerMM']['AP'] * 1e3 # mm -> um 1b

1016 pt = np.array([coord_ml, coord_ap]) 1b

1017 

1018 points, connectivity_list = self.load_triangulation() 1b

1019 # Only keep faces that have normals pointing up (positive DV value). 

1020 # Calculate the normal vector pointing out of the convex hull. 

1021 triangles = points[connectivity_list, :] 1b

1022 normals = surface_normal(triangles) 1b

1023 up_faces, = np.where(normals[:, -1] > 0) 1b

1024 # only keep triangles that have normal vector with positive DV component 

1025 dorsal_connectivity_list = connectivity_list[up_faces, :] 1b

1026 # Flatten triangulation by dropping the dorsal coordinates and find the location of the 

1027 # window center (we convert mm -> um here) 

1028 face_ind = find_triangle(pt * 1e-3, points[:, :2] * 1e-3, dorsal_connectivity_list.astype(np.intp)) 1b

1029 assert face_ind != -1 1b

1030 

1031 dorsal_triangle = points[dorsal_connectivity_list[face_ind, :], :] 1b

1032 

1033 # Get the surface normal unit vector of dorsal triangle 

1034 normal_vector = surface_normal(dorsal_triangle) 1b

1035 

1036 # Update the surgery JSON field with normal unit vector, for use in histology alignment 

1037 self.update_surgery_json(meta, normal_vector) 1b

1038 

1039 # find the coordDV that sits on the triangular face and had [coordML, coordAP] coordinates; 

1040 # the three vertices defining the triangle 

1041 face_vertices = points[dorsal_connectivity_list[face_ind, :], :] 1b

1042 

1043 # all the vertices should be on the plane ax + by + cz = 1, so we can find 

1044 # the abc coefficients by inverting the three equations for the three vertices 

1045 abc, *_ = np.linalg.lstsq(face_vertices, np.ones(3), rcond=None) 1b

1046 

1047 # and then find a point on that plane that corresponds to a given x-y 

1048 # coordinate (which is ML-AP coordinate) 

1049 coord_dv = (1 - pt @ abc[:2]) / abc[2] 1b

1050 

1051 # We should not use the actual surface of the brain for this, as it might be in one of the sulci 

1052 # DO NOT USE THIS: 

1053 # coordDV = interp2(axisMLmm, axisAPmm, surfaceDV, coordML, coordAP) 

1054 

1055 # Now we need to span the plane of the coverslip with two orthogonal unit vectors. 

1056 # We start with vY, because the order is important and we usually have less 

1057 # tilt along AP (pitch), which will cause less deviation in vX from pure ML. 

1058 vY = np.array([0, normal_vector[2], -normal_vector[1]]) # orthogonal to the normal of the plane 1b

1059 vX = np.cross(vY, normal_vector) # orthogonal to n and to vY 1b

1060 # normalize and flip the sign if necessary 

1061 vX = vX / np.sqrt(vX @ vX) * np.sign(vX[0]) # np.sqrt(vY @ vY) == LR norm of vX 1b

1062 vY = vY / np.sqrt(vY @ vY) * np.sign(vY[1]) 1b

1063 

1064 # what are the dimensions of the data arrays (ap, ml, dv) 

1065 (nAP, nML, nDV) = atlas.image.shape 1b

1066 # Let's shift the coordinates relative to bregma 

1067 voxel_size = atlas.res_um # [um] resolution of the atlas 1b

1068 bregma_coords = ALLEN_CCF_LANDMARKS_MLAPDV_UM['bregma'] / voxel_size # (ml, ap, dv) 1b

1069 axis_ml_um = (np.arange(nML) - bregma_coords[0]) * voxel_size 1b

1070 axis_ap_um = (np.arange(nAP) - bregma_coords[1]) * voxel_size * -1. 1b

1071 axis_dv_um = (np.arange(nDV) - bregma_coords[2]) * voxel_size * -1. 1b

1072 

1073 # projection of FOVs on the brain surface to get ML-AP-DV coordinates 

1074 _logger.info('Projecting in 3D') 1b

1075 for i, fov in enumerate(meta['FOV']): # i, fov = next(enumerate(meta['FOV'])) 1b

1076 start_time = time.time() 1b

1077 _logger.info(f'FOV {i + 1}/{len(meta["FOV"])}') 1b

1078 y_px_idx, x_px_idx = np.mgrid[0:fov['nXnYnZ'][0], 0:fov['nXnYnZ'][1]] 1b

1079 

1080 # xx and yy are in mm in coverslip space 

1081 points = ((0, fov['nXnYnZ'][0] - 1), (0, fov['nXnYnZ'][1] - 1)) 1b

1082 # The four corners of the FOV, determined by taking the center of the craniotomy in MM, 

1083 # the x-y coordinates of the imaging window center (from the tiled reference image) in 

1084 # galvanometer units, and the x-y coordinates of the FOV center in galvanometer units. 

1085 values = [[fov['MM']['topLeft'][0], fov['MM']['topRight'][0]], 1b

1086 [fov['MM']['bottomLeft'][0], fov['MM']['bottomRight'][0]]] 

1087 values = np.array(values) * 1e3 # mm -> um 1b

1088 xx = interpn(points, values, (y_px_idx, x_px_idx)) 1b

1089 

1090 values = [[fov['MM']['topLeft'][1], fov['MM']['topRight'][1]], 1b

1091 [fov['MM']['bottomLeft'][1], fov['MM']['bottomRight'][1]]] 

1092 values = np.array(values) * 1e3 # mm -> um 1b

1093 yy = interpn(points, values, (y_px_idx, x_px_idx)) 1b

1094 

1095 xx = xx.flatten() - coord_ml 1b

1096 yy = yy.flatten() - coord_ap 1b

1097 

1098 # rotate xx and yy in 3D 

1099 # the coords are still on the coverslip, but now have 3D values 

1100 coords = np.outer(xx, vX) + np.outer(yy, vY) # (vX * xx) + (vY * yy) 1b

1101 coords = coords + [coord_ml, coord_ap, coord_dv] 1b

1102 

1103 # for each point of the FOV create a line parametrization (trajectory normal to the coverslip plane). 

1104 # start just above the coverslip and go 3 mm down, should be enough to 'meet' the brain 

1105 t = np.arange(-voxel_size, 3e3, voxel_size) 1b

1106 

1107 # Find the MLAPDV atlas coordinate and brain location of each pixel. 

1108 MLAPDV, annotation = _update_points( 1b

1109 t, normal_vector, coords, axis_ml_um, axis_ap_um, axis_dv_um, atlas.label) 

1110 annotation = atlas.regions.index2id(annotation) # convert annotation indices to IDs 1b

1111 

1112 if np.any(np.isnan(MLAPDV)): 1b

1113 _logger.warning('Areas of FOV lie outside the brain') 1b

1114 _logger.info(f'done ({time.time() - start_time:3.1f} seconds)\n') 1b

1115 MLAPDV = np.reshape(MLAPDV, [*x_px_idx.shape, 3]) 1b

1116 annotation = np.reshape(annotation, x_px_idx.shape) 1b

1117 

1118 fov['MLAPDV'] = { 1b

1119 'topLeft': MLAPDV[0, 0, :].tolist(), 

1120 'topRight': MLAPDV[0, -1, :].tolist(), 

1121 'bottomLeft': MLAPDV[-1, 0, :].tolist(), 

1122 'bottomRight': MLAPDV[-1, -1, :].tolist(), 

1123 'center': MLAPDV[round(x_px_idx.shape[0] / 2) - 1, round(x_px_idx.shape[1] / 2) - 1, :].tolist() 

1124 } 

1125 

1126 # Save the brain regions of the corners/centers of FOV (annotation field) 

1127 fov['brainLocationIds'] = { 1b

1128 'topLeft': int(annotation[0, 0]), 

1129 'topRight': int(annotation[0, -1]), 

1130 'bottomLeft': int(annotation[-1, 0]), 

1131 'bottomRight': int(annotation[-1, -1]), 

1132 'center': int(annotation[round(x_px_idx.shape[0] / 2) - 1, round(x_px_idx.shape[1] / 2) - 1]) 

1133 } 

1134 

1135 mlapdv[i] = MLAPDV 1b

1136 location_id[i] = annotation 1b

1137 return mlapdv, location_id 1b

1138 

1139 

1140def surface_normal(triangle): 

1141 """ 

1142 Calculate the surface normal unit vector of one or more triangles. 

1143 

1144 Parameters 

1145 ---------- 

1146 triangle : numpy.array 

1147 An array of shape (n_triangles, 3, 3) representing (Px Py Pz). 

1148 

1149 Returns 

1150 ------- 

1151 numpy.array 

1152 The surface normal unit vector(s). 

1153 """ 

1154 if triangle.shape == (3, 3): 1lb

1155 triangle = triangle[np.newaxis, :, :] 1lb

1156 if triangle.shape[1:] != (3, 3): 1lb

1157 raise ValueError('expected array of shape (3, 3); 3 coordinates in x, y, and z') 1l

1158 V = triangle[:, 1, :] - triangle[:, 0, :] # V = P2 - P1 1lb

1159 W = triangle[:, 2, :] - triangle[:, 0, :] # W = P3 - P1 1lb

1160 

1161 Nx = (V[:, 1] * W[:, 2]) - (V[:, 2] * W[:, 1]) # Nx = (Vy * Wz) - (Vz * Wy) 1lb

1162 Ny = (V[:, 2] * W[:, 0]) - (V[:, 0] * W[:, 2]) # Ny = (Vz * Wx) - (Vx * Wz) 1lb

1163 Nz = (V[:, 0] * W[:, 1]) - (V[:, 1] * W[:, 0]) # Nz = (Vx * Wy) - (Vy * Wx) 1lb

1164 N = np.c_[Nx, Ny, Nz] 1lb

1165 # Calculate unit vector. Transpose allows vectorized operation. 

1166 A = N / np.sqrt((Nx ** 2) + (Ny ** 2) + (Nz ** 2))[np.newaxis].T 1lb

1167 return A.squeeze() 1lb

1168 

1169 

1170@nb.njit('b1(f8[:,:], f8[:])') 

1171def in_triangle(triangle, point): 

1172 """ 

1173 Check whether `point` lies within `triangle`. 

1174 

1175 Parameters 

1176 ---------- 

1177 triangle : numpy.array 

1178 A (2 x 3) array of x-y coordinates; A(x1, y1), B(x2, y2) and C(x3, y3). 

1179 point : numpy.array 

1180 A point, P(x, y). 

1181 

1182 Returns 

1183 ------- 

1184 bool 

1185 True if coordinate lies within triangle. 

1186 """ 

1187 def area(x1, y1, x2, y2, x3, y3): 

1188 """Calculate the area of a triangle, given its vertices.""" 

1189 return abs((x1 * (y2 - y3) + x2 * (y3 - y1) + x3 * (y1 - y2)) / 2.) 

1190 

1191 x1, y1, x2, y2, x3, y3 = triangle.flat 

1192 x, y = point 

1193 A = area(x1, y1, x2, y2, x3, y3) # area of triangle ABC 

1194 A1 = area(x, y, x2, y2, x3, y3) # area of triangle PBC 

1195 A2 = area(x1, y1, x, y, x3, y3) # area of triangle PAC 

1196 A3 = area(x1, y1, x2, y2, x, y) # area of triangle PAB 

1197 # Check if sum of A1, A2 and A3 equals that of A 

1198 diff = np.abs((A1 + A2 + A3) - A) 

1199 REL_TOL = 1e-9 

1200 return diff <= np.abs(REL_TOL * A) # isclose not yet implemented in numba 0.57 

1201 

1202 

1203@nb.njit('i8(f8[:], f8[:,:], intp[:,:])', nogil=True) 

1204def find_triangle(point, vertices, connectivity_list): 

1205 """ 

1206 Find which vertices contain a given point. 

1207 

1208 Currently O(n) but could take advantage of connectivity order to be quicker. 

1209 

1210 Parameters 

1211 ---------- 

1212 point : numpy.array 

1213 The (x, y) coordinate of a point to locate within one of the triangles. 

1214 vertices : numpy.array 

1215 An N x 3 array of vertices representing a triangle mesh. 

1216 connectivity_list : numpy.array 

1217 An N x 3 array of indices representing the connectivity of `points`. 

1218 

1219 Returns 

1220 ------- 

1221 int 

1222 The index of the vertices containing `point`, or -1 if not within any triangle. 

1223 """ 

1224 face_ind = -1 

1225 for i in nb.prange(connectivity_list.shape[0]): 

1226 triangle = vertices[connectivity_list[i, :], :] 

1227 if in_triangle(triangle, point): 

1228 face_ind = i 

1229 break 

1230 return face_ind 

1231 

1232 

1233@nb.njit('Tuple((f8[:], intp[:]))(f8[:], f8[:])', nogil=True) 

1234def _nearest_neighbour_1d(x, x_new): 

1235 """ 

1236 Nearest neighbour interpolation with extrapolation. 

1237 

1238 This was adapted from scipy.interpolate.interp1d but returns the indices of each nearest x 

1239 value. Assumes x is not sorted. 

1240 

1241 Parameters 

1242 ---------- 

1243 x : (N,) array_like 

1244 A 1-D array of real values. 

1245 x_new : (N,) array_like 

1246 A 1D array of values to apply function to. 

1247 

1248 Returns 

1249 ------- 

1250 numpy.array 

1251 A 1D array of interpolated values. 

1252 numpy.array 

1253 A 1D array of indices. 

1254 """ 

1255 SIDE = 'left' # use 'right' to round up to nearest int instead of rounding down 

1256 # Sort values 

1257 ind = np.argsort(x, kind='mergesort') 

1258 x = x[ind] 

1259 x_bds = x / 2.0 # Do division before addition to prevent possible integer overflow 

1260 x_bds = x_bds[1:] + x_bds[:-1] 

1261 # Find where in the averaged data the values to interpolate would be inserted. 

1262 x_new_indices = np.searchsorted(x_bds, x_new, side=SIDE) 

1263 # Clip x_new_indices so that they are within the range of x indices. 

1264 x_new_indices = x_new_indices.clip(0, len(x) - 1).astype(np.intp) 

1265 # Calculate the actual value for each entry in x_new. 

1266 y_new = x[x_new_indices] 

1267 return y_new, ind[x_new_indices] 

1268 

1269 

1270@nb.njit('Tuple((f8[:,:], u2[:]))(f8[:], f8[:], f8[:,:], f8[:], f8[:], f8[:], u2[:,:,:])', nogil=True) 

1271def _update_points(t, normal_vector, coords, axis_ml_um, axis_ap_um, axis_dv_um, atlas_labels): 

1272 """ 

1273 Determine the MLAPDV coordinate and brain location index for each of the given coordinates. 

1274 

1275 This has been optimized in numba. The majority of the time savings come from replacing iterp1d 

1276 and ismember with _nearest_neighbour_1d which were extremely slow. Parallel iteration further 

1277 halved the time it took per 512x512 FOV. 

1278 

1279 Parameters 

1280 ---------- 

1281 t : numpy.array 

1282 An N x 3 evenly spaced set of coordinates representing points going down from the coverslip 

1283 towards the brain. 

1284 normal_vector : numpy.array 

1285 The unit vector of the face normal to the center of the window. 

1286 coords : numpy.array 

1287 A set of N x 3 coordinates representing the MLAPDV coordinates of each pixel relative to 

1288 the center of the window, in micrometers (um). 

1289 axis_ml_um : numpy.array 

1290 An evenly spaced array of medio-lateral brain coordinates relative to bregma in um, at the 

1291 resolution of the atlas image used. 

1292 axis_ap_um : numpy.array 

1293 An evenly spaced array of anterio-posterior brain coordinates relative to bregma in um, at 

1294 the resolution of the atlas image used. 

1295 axis_dv_um : numpy.array 

1296 An evenly spaced array of dorso-ventral brain coordinates relative to bregma in um, at 

1297 the resolution of the atlas image used. 

1298 atlas_labels : numpy.array 

1299 A 3D array of integers representing the brain location index of each voxel of a given 

1300 atlas. The shape is expected to be (nAP, nML, nDV). 

1301 

1302 Returns 

1303 ------- 

1304 numpy.array 

1305 An N by 3 array containing the MLAPDV coordinates in um of each pixel coordinate. 

1306 Coordinates outside of the brain are NaN. 

1307 numpy.array 

1308 A 1D array of atlas label indices the length of `coordinates`. 

1309 """ 

1310 # passing through the center of the craniotomy/coverslip 

1311 traj_coords_centered = np.outer(t, -normal_vector) 

1312 MLAPDV = np.full_like(coords, np.nan) 

1313 annotation = np.zeros(coords.shape[0], dtype=np.uint16) 

1314 n_points = coords.shape[0] 

1315 for p in nb.prange(n_points): 

1316 # Shifted to the correct point on the coverslip, in true ML-AP-DV coords 

1317 traj_coords = traj_coords_centered + coords[p, :] 

1318 

1319 # Find intersection coordinate with the brain. 

1320 # Only use coordinates that exist in the atlas (kind of nearest neighbour interpolation) 

1321 ml, ml_idx = _nearest_neighbour_1d(axis_ml_um, traj_coords[:, 0]) 

1322 ap, ap_idx = _nearest_neighbour_1d(axis_ap_um, traj_coords[:, 1]) 

1323 dv, dv_idx = _nearest_neighbour_1d(axis_dv_um, traj_coords[:, 2]) 

1324 

1325 # Iterate over coordinates to find the first (if any) that is within the brain 

1326 ind = -1 

1327 area = 0 # 0 = void; 1 = root 

1328 for i in nb.prange(traj_coords.shape[0]): 

1329 anno = atlas_labels[ap_idx[i], ml_idx[i], dv_idx[i]] 

1330 if anno > 0: # first coordinate in the brain 

1331 ind = i 

1332 area = anno 

1333 if area > 1: # non-root brain area; we're done 

1334 break 

1335 if area > 1: 

1336 point = traj_coords[ind, :] 

1337 MLAPDV[p, :] = point # in um 

1338 annotation[p] = area 

1339 else: 

1340 MLAPDV[p, :] = np.nan 

1341 annotation[p] = area # root or void 

1342 

1343 return MLAPDV, annotation