Coverage for ibllib/pipes/purge_rig_data.py: 0%

47 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-11 11:13 +0100

1#!/usr/bin/env python 

2# -*- coding:utf-8 -*- 

3# @Author: Niccolò Bonacchi 

4# @Date: Thursday, March 28th 2019, 7:53:44 pm 

5""" 

6Purge data from RIG 

7- Find all files by rglob 

8- Find all sessions of the found files 

9- Check Alyx if corresponding datasetTypes have been registered as existing 

10sessions and files on Flatiron 

11- Delete local raw file if found on Flatiron 

12""" 

13import argparse 

14from pathlib import Path 

15 

16from one.api import ONE 

17from one.alf.files import get_session_path 

18 

19 

20def session_name(path) -> str: 

21 """Returns the session name (subject/date/number) string for any filepath 

22 using session_path""" 

23 return '/'.join(get_session_path(path).parts[-3:]) 

24 

25 

26def purge_local_data(local_folder, file_name, lab=None, dry=False): 

27 # Figure out datasetType from file_name or file path 

28 file_name = Path(file_name).name 

29 alf_parts = file_name.split('.') 

30 dstype = '.'.join(alf_parts[:2]) 

31 print(f'Looking for file <{file_name}> in folder <{local_folder}>') 

32 # Get all paths for file_name in local folder 

33 local_folder = Path(local_folder) 

34 files = list(local_folder.rglob(f'*{file_name}')) 

35 print(f'Found {len(files)} files') 

36 print(f'Checking on Flatiron for datsetType: {dstype}...') 

37 # Get all sessions and details from Alyx that have the dstype 

38 one = ONE(cache_rest=None) 

39 if lab is None: 

40 eid, det = one.search(dataset_types=[dstype], details=True) 

41 else: 

42 eid, det = one.search(dataset_types=[dstype], lab=lab, details=True) 

43 urls = [] 

44 for d in det: 

45 urls.extend([x['data_url'] for x in d['data_dataset_session_related'] 

46 if x['dataset_type'] == dstype]) 

47 # Remove None answers when session is registered but dstype not htere yet 

48 urls = [u for u in urls if u is not None] 

49 print(f'Found files on Flatiron: {len(urls)}') 

50 to_remove = [] 

51 for f in files: 

52 sess_name = session_name(f) 

53 for u in urls: 

54 if sess_name in u: 

55 to_remove.append(f) 

56 print(f'Local files to remove: {len(to_remove)}') 

57 for f in to_remove: 

58 print(f) 

59 if dry: 

60 continue 

61 else: 

62 f.unlink() 

63 return 

64 

65 

66if __name__ == "__main__": 

67 parser = argparse.ArgumentParser(description='Delete files from rig') 

68 parser.add_argument('folder', help='Local iblrig_data folder') 

69 parser.add_argument( 

70 'file', help='File name to search and destroy for every session') 

71 parser.add_argument('-lab', required=False, default=None, 

72 help='Lab name, search on Alyx faster. default: None') 

73 parser.add_argument('--dry', required=False, default=False, 

74 action='store_true', help='Dry run? default: False') 

75 args = parser.parse_args() 

76 purge_local_data(args.folder, args.file, lab=args.lab, dry=args.dry) 

77 print('Done\n')