import os rtdir = '/srv/ftp/hummingbird/2021/07/21' #rtdir = os.getcwd() files_read = list() for cdir, _, files in os.walk(rtdir): for f in files: if f.endswith('.json'): files_read.append(os.path.join(cdir, f)) import json fcontents = dict() for f in files_read: with open(f) as ff: fcontents[f] = json.load(ff) def compute_2d_scores(cbbs): locs = list() scores = list() n_scores = 0 for cbb in cbbs: if len(cbb['scores']) > 0: n_scores+=1 first_box = cbb['boxes'][0] first_score = cbb['scores'][0] x_cent = np.mean(first_box[0::2]) y_cent = np.mean(first_box[1::2]) locs.append([x_cent, y_cent]) scores.append(first_score) center = np.average(locs, weights=scores, axis=0) stddev = np.average(np.linalg.norm(center - locs,axis=1),weights=scores) return stddev, scores data = {'file':list(), 'stddev':list(),'weights':list()} for fname, contents in fcontents.items(): if len(contents) > 0: data['file'].append(fname) sdev, sc = compute_2d_scores(contents) data['stddev'].append(sdev) data['weights'].append(np.average(sc)) import pandas as pd df = pd.DataFrame(data) fil = df.loc[(df['weights'] > 0.4) & (df['stddev'] > 400) ]