mirror of
https://github.com/sigmasternchen/grimoire
synced 2025-03-15 08:08:55 +00:00
fix: Deduplicate content list before processing
This commit is contained in:
parent
fe7039750d
commit
78f0e22507
1 changed files with 14 additions and 0 deletions
|
@ -25,6 +25,18 @@ def handle_file(filename):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def deduplicate(candidates):
|
||||||
|
names = set()
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate["relative_filename"] not in names:
|
||||||
|
names.add(candidate["relative_filename"])
|
||||||
|
results.append(candidate)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def recursively_read_files(context):
|
def recursively_read_files(context):
|
||||||
data = []
|
data = []
|
||||||
|
|
||||||
|
@ -33,6 +45,8 @@ def recursively_read_files(context):
|
||||||
for filename in context["filenames"]:
|
for filename in context["filenames"]:
|
||||||
data.extend(for_each_glob(filename, handle_file))
|
data.extend(for_each_glob(filename, handle_file))
|
||||||
|
|
||||||
|
data = deduplicate(data)
|
||||||
|
|
||||||
logger.info(f"Read %d files in total.", len(data))
|
logger.info(f"Read %d files in total.", len(data))
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
Loading…
Reference in a new issue