fix: Deduplicate content list before processing

This commit is contained in:
sigmasternchen 2025-01-12 22:14:00 +01:00
parent fe7039750d
commit 78f0e22507

View file

@ -25,6 +25,18 @@ def handle_file(filename):
return results return results
def deduplicate(candidates):
names = set()
results = []
for candidate in candidates:
if candidate["relative_filename"] not in names:
names.add(candidate["relative_filename"])
results.append(candidate)
return results
def recursively_read_files(context): def recursively_read_files(context):
data = [] data = []
@ -33,6 +45,8 @@ def recursively_read_files(context):
for filename in context["filenames"]: for filename in context["filenames"]:
data.extend(for_each_glob(filename, handle_file)) data.extend(for_each_glob(filename, handle_file))
data = deduplicate(data)
logger.info(f"Read %d files in total.", len(data)) logger.info(f"Read %d files in total.", len(data))
return data return data