Commit d0e1bb33 authored by Stefano Zacchiroli's avatar Stefano Zacchiroli

New upstream version 2.1.0+hg20180919

parent caaff1ed
repo: 26cf45c5599ff39ae649d05d52e820039256157e
node: 9f47c3efad9a83b16ebbb8e1e20cb3170dfe6683
node: 5a6ad02c213565a81a51d8a6d8e05fae18cefd83
branch: default
latesttag: 2.0.0
latesttagdistance: 59
changessincelatesttag: 82
latesttag: 2.1.2
latesttagdistance: 80
changessincelatesttag: 104
......@@ -15,3 +15,4 @@ beancount/parser/grammar.output
.idea
beancount.egg-info
.cache
.pytest_cache/*
......@@ -4,3 +4,6 @@ da62959c106a3f06ffc4a1fcdb5e6ae97e77152d master
6d69f09be1d4ac9365432b2a8a4bc130b521ccda github/master
a85679977ede1586585ae0b92cd121e1b8835481 github/master
8af008648277cd9bf2a6699d6db550ad12d88453 2.0.0
e291c91b37e1d21c29645d93473a7917bb726699 2.1.0
c36d4fec6823ddbfbabbfde429f01175cdb4d661 2.1.1
2b20d301782528147e0aba744da9a014323ef630 2.1.2
This diff is collapsed.
Martin Blais (blais at furius dot ca)
Martin Blais (original author).
Original author of Beancount v1 and v2.
Daniel Clemente (n142857 at gmail dot com)
Was an early and patient user of an incomplete Beancount v2, reported a large
numbers of bugs and discussed many of the design ideas from the perspective of
a user. This helped motivate the completion and polishing of this new version,
as well as understand what aspects needed more explanations and documentation.
Contributors (in alphabetical order):
- Adrián Medraño Calvo (support for UTF-8)
- Alex Johnstone (bug fix)
- Alok Parlikar (bug fix)
- Christoph Sarnowski (bug fix)
- Daniel Clemente (early adoption and tickets)
- Dominik Aumayr (web-related bug fixes, static documentation, author of Fava)
- Ethan Glasser-Camp (improvements to grammar)
- Felix Eckhofer (bug fix)
- Hugo Ideler (bug fix price source0
- Jakob Schnitzer (bug fixes in plugins and SQL, co-author/maintainer of Fava)
- Jason Chu (bug fixes, plugins, lots of comments)
- Jeff Brantley (windows installation improvements)
- Johannes Harms (bug fixes, forecast plugin improvement)
- Mark Hansen (docs)
- Markus Teufelberger (bug fix / testing)
- Martin Michlmayr (numerous tickets, CI setup, bug fixes and improvements)
- Michael Droogleever (importer-related improvements)
- Mikhail Gusarov (parser / improvement for short account names)
- Patrick Ruckstuhl (bug fix importers)
- Robert Sesek (bug fix)
- Zhuoyun Wei (importers improvements)
- dave_stephens (web-related improvement)
- rkhwaja (windows installation improvement)
- Сергей Трофимов (bug fix, parser)
And all the other users on the mailing-list asking important questions, making
useful suggestions and engaging in productive discussions. Thank you!
......@@ -5,9 +5,9 @@ INPUT = $(HOME)/q/office/accounting/blais.beancount
DOWNLOADS = $(HOME)/u/Downloads
GREP="grep --include="*.py" -srnE"
TOOLS=./etc
TOOLS=./tools
PYTHON=python3
PYTHON?=python3
all: build
......@@ -24,7 +24,12 @@ clean:
# Targets to generate and compile the C parser.
CROOT = beancount/parser
# See
# https://www.owlfolio.org/possibly-useful/flex-input-scanner-rules-are-too-complicated/
#LEX = flex -Ca
LEX = flex
YACC = bison --report=itemset --verbose
FILTERYACC = sed -e 's@/\*[ \t]yacc\.c:.*\*/@@'
TMP=/tmp
......@@ -34,9 +39,24 @@ $(CROOT)/grammar.c $(CROOT)/grammar.h: $(CROOT)/grammar.y
(cat $(CROOT)/grammar.c | $(FILTERYACC) > $(TMP)/grammar.c ; mv $(TMP)/grammar.c $(CROOT)/grammar.c )
(cat $(CROOT)/grammar.h | $(FILTERYACC) > $(TMP)/grammar.h ; mv $(TMP)/grammar.h $(CROOT)/grammar.h )
UNICODE_CATEGORY_RANGES_GENERATOR=$(TOOLS)/generate_unicode_category_regexps.py
UNICODE_CATEGORY_DIR = $(CROOT)/lexer
UNICODE_CATEGORIES = Lu Ll Lt Lo Nd Nl No
UNICODE_CATEGORY_SOURCES = $(patsubst %, $(UNICODE_CATEGORY_DIR)/%.l, $(UNICODE_CATEGORIES))
$(UNICODE_CATEGORY_SOURCES): $(UNICODE_CATEGORY_DIR)/%.l :
$(PYTHON) $(UNICODE_CATEGORY_RANGES_GENERATOR) \
--format=lex --name=UTF-8-$* --categories=$* >$@
# Note that flex parses the files in the given order.
#LEXER_SOURCES = $(UNICODE_CATEGORY_SOURCES) $(CROOT)/lexer.l
#$(CROOT)/lexer.c $(CROOT)/lexer.h: $(LEXER_SOURCES) $(CROOT)/grammar.h
# $(LEX) --outfile=$(CROOT)/lexer.c --header-file=$(CROOT)/lexer.h $(LEXER_SOURCES)
# patch -p1 < $(CROOT)/lexer.patch
FLEX_VERSION=$(shell $(LEX) -V)
$(CROOT)/lexer.c $(CROOT)/lexer.h: $(CROOT)/lexer.l $(CROOT)/grammar.h
$(LEX) --outfile=$(CROOT)/lexer.c --header-file=$(CROOT)/lexer.h $<
SOURCES = \
$(CROOT)/lexer.c \
$(CROOT)/lexer.h \
......@@ -136,20 +156,17 @@ release:
$(PYTHON) setup.py register sdist upload
# Run the unittests.
NOSE ?= nosetests3
vtest vtests verbose-test verbose-tests:
$(NOSE) -v -s beancount
$(PYTHON) -m pytest -v -s beancount examples
qtest qtests quiet-test quiet-tests test tests:
$(NOSE) beancount
$(PYTHON) -m pytest beancount
test-failed:
$(NOSE) --failed beancount
test-last test-last-failed test-failed:
$(PYTHON) -m pytest --last-failed beancount
nakedtests:
PATH=/bin:/usr/bin PYTHONPATH= /usr/local/bin/$(NOSE) -x beancount
test-naked:
PATH=/bin:/usr/bin PYTHONPATH= $(PYTHON) -m pytest -x beancount
# Run the parser and measure its performance.
.PHONY: check
......@@ -228,6 +245,10 @@ PYLINT = python3 $(shell which pylint)
pylint lint:
$(PYLINT) --rcfile=$(PWD)/etc/pylintrc $(LINT_SRCS)
LINT_TESTS=useless-suppression,empty-docstring
pylint-only:
$(PYLINT) --rcfile=$(PWD)/etc/pylintrc --disable=all --enable=$(LINT_TESTS) $(LINT_SRCS)
pyflakes:
pyflakes $(LINT_SRCS)
......
......@@ -4672,6 +4672,64 @@ http://furius.ca/beancount/doc/proposal-query
it down. Every couple of years I clean this mess up and put it in the sections
above.
- Three interesting ideas for extending booking further here:
https://groups.google.com/d/msgid/beancount/9c9dcc5f-72bf-44b2-aaf9-4e0c07cbff77%40googlegroups.com?utm_medium=email&utm_source=footer
1. When you apply the partial booking specification, it could be applied
against the booked legs of the transaction in order to select cost bases.
I'm not sure if this generalizes.
"Finally, the partial specification, say, just specifying the date, is
used to narrow down the lot against the list of possible lots in the
Inventory of the account before applying the transaction, but NOT against
the list of other postings. That would be an interesting power to add to
the booking system, as it could disambiguate this case."
2. One could assume a single currency group in each transaction when there's
at least one posting at cost.
3. One could also assume that in that account a commodity is never priced in
terms of two different currencies.
- Allow plugins to run before and after booking; this would make it possible
for plugins to run on CostSpec and fill in more information. In fact, maybe
the booking process itself could be moved to a plugin. This coudl be a
powerful idea, in that it would clarify the distinction between the two
streams of transactions.
- Add support for this data source: https://www.alphavantage.co/documentation/
- Sometimes it might be useful to end the stream of useful transactions at a
particular point in the input file. Normally this is best done by filtering
in the queries themselves, but there are times where having an End
directives specified temporarily in the input file would have been useful
(e.g. when traveling and running temporary balances while editing the input
file). Consider adding this facility, it might be a convenience, especially
to get around some of the absence of time issues.
- In the new SQL shell, allow the plugins to define new subsets of postings
(transactions), so that they can be queries separately. For instance, when
I'm traveling, it should be possible to query the set of transactions before
or after the split_expenses plugin ran. This would actually have been useful
while traveling, because the split_expenses plugin doubles up a lot of the
postings (that's what it does...).
- Document the "{ <num> # <num> <ccy> }" syntax properly, in a dedicated place.
- New currency sources:
https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/index.en.html
https://news.ycombinator.com/item?id=15616880
- Idea: Add "contra" as a new column, when the sign posted in the wrong
direction based on the account type. Or add a boolean "contra" virtual
column (or function, from account name).
- Here's another way to implement an intra-day balance: define a new balance
directive type, whose semantic is tp sort all the entries, but the entries
on the same day of the balance are sorted in file order. I think this would
handle the most common use case. Try it (experiment).
- It might be interesting to support some sort of transfer syntax that would
allow the movement of Position's across accounts, without converting to the
cost, something like this:
......
......@@ -11,3 +11,6 @@ __license__ = "GNU GPLv2"
import sys
if (sys.version_info.major, sys.version_info.minor) < (3, 3):
raise ImportError("Python 3.3 or above is required")
__version__ = '2.1.3-devel'
......@@ -11,15 +11,24 @@ import re
import os
from os import path
from beancount.utils import regexp_utils
# Component separator for account names.
# pylint: disable=invalid-name
sep = ':'
# Regular expression string that matchs a valid account.
# (Also see valid_account_regexp() which is coupled to this value.)
ACCOUNT_RE = '(?:[A-Z][A-Za-z0-9\-]*)(?:{}[A-Z0-9][A-Za-z0-9\-]*)+'.format(sep)
# Regular expression string that matches valid account name components.
# Categories are:
# Lu: Uppercase letters.
# L: All letters.
# Nd: Decimal numbers.
ACC_COMP_TYPE_RE = regexp_utils.re_replace_unicode(r"[\p{Lu}][\p{L}\p{Nd}\-]*")
ACC_COMP_NAME_RE = regexp_utils.re_replace_unicode(r"[\p{Lu}\p{Nd}][\p{L}\p{Nd}\-]*")
# Regular expression string that matches a valid account. {5672c7270e1e}
ACCOUNT_RE = "(?:{})(?:{}{})+".format(ACC_COMP_TYPE_RE, sep, ACC_COMP_NAME_RE)
# A dummy object which stands for the account type. Values in custom directives
......
......@@ -52,7 +52,7 @@ class TmpFilesTestBase(unittest.TestCase):
parent_dir = path.dirname(abs_filename)
if not path.exists(parent_dir):
os.makedirs(parent_dir)
open(abs_filename, 'w')
with open(abs_filename, 'w'): pass
return tempdir, root
......
......@@ -62,7 +62,8 @@ class TestAccountTypes(unittest.TestCase):
def test_is_account_type(self):
self.assertTrue(account_types.is_account_type("Assets", "Assets:US:RBS:Checking"))
self.assertFalse(account_types.is_account_type("Expenses", "Assets:US:RBS:Checking"))
self.assertFalse(account_types.is_account_type("Expenses",
"Assets:US:RBS:Checking"))
self.assertFalse(account_types.is_account_type("Assets", "AssetsUS:RBS:Checking"))
def test_is_root_account(self):
......
......@@ -23,7 +23,6 @@ from beancount.core.number import D
# Note: This is kept in sync with "beancount/parser/lexer.l".
CURRENCY_RE = r'[A-Z][A-Z0-9\'\.\_\-]{0,22}[A-Z0-9]'
# pylint: disable=invalid-name
_Amount = NamedTuple('_Amount', [
('number', Optional[Decimal]),
('currency', str)])
......@@ -59,8 +58,10 @@ class Amount(_Amount):
Returns:
A formatted string of the quantized amount and symbol.
"""
return "{} {}".format(dformat.format(self.number, self.currency),
self.currency)
number_fmt = (dformat.format(self.number, self.currency)
if isinstance(self.number, Decimal)
else str(self.number))
return "{} {}".format(number_fmt, self.currency)
def __str__(self):
"""Convert an Amount instance to a printable string with the defaults.
......
......@@ -54,7 +54,7 @@ class Booking(enum.Enum):
LIFO = 'LIFO'
def new_directive(clsname, fields: List[Tuple]):
def new_directive(clsname, fields: List[Tuple]) -> NamedTuple:
"""Create a directive class. Do not include default fields.
This should probably be carried out through inheritance.
......@@ -386,6 +386,25 @@ ALL_DIRECTIVES = (
Custom
)
# Type for any of the directives.
Directive = Union[
Open,
Close,
Commodity,
Pad,
Balance,
Transaction,
Note,
Event,
Query,
Price,
Document,
Custom
]
# Type for the list of entries.
Entries = List[Directive]
def new_metadata(filename, lineno, kvlist=None):
"""Create a new metadata container from the filename and line number.
......
......@@ -5,7 +5,6 @@ __license__ = "GNU GPLv2"
import collections
import copy
import warnings
from beancount.core.number import D
from beancount.core.number import Decimal
......@@ -17,7 +16,6 @@ from beancount.core.position import CostSpec
from beancount.core.position import Cost
from beancount.core.inventory import Inventory
from beancount.core import inventory
from beancount.core import position
from beancount.core import convert
from beancount.core.data import Transaction
from beancount.core.data import Posting
......@@ -171,10 +169,10 @@ def infer_tolerances(postings, options_map, use_cost=None):
cost = posting.cost
if cost is not None:
cost_currency = cost.currency
if isinstance(cost, position.Cost):
if isinstance(cost, Cost):
cost_tolerance = min(tolerance * cost.number, MAXIMUM_TOLERANCE)
else:
assert isinstance(cost, position.CostSpec)
assert isinstance(cost, CostSpec)
cost_tolerance = MAXIMUM_TOLERANCE
for cost_number in cost.number_total, cost.number_per:
if cost_number is None or cost_number is MISSING:
......@@ -193,7 +191,7 @@ def infer_tolerances(postings, options_map, use_cost=None):
tolerances[currency] = max(tolerance, tolerances.get(currency, -1024))
default = tolerances.pop('*', ZERO)
return defdict.ImmutableDictWithDefault(default, tolerances)
return defdict.ImmutableDictWithDefault(tolerances, default=default)
# Meta-data field appended to automatically inserted postings.
......
......@@ -588,7 +588,8 @@ class TestInferTolerances(cmptest.TestCase):
class TestQuantize(unittest.TestCase):
def test_quantize_with_tolerance(self):
tolerances = defdict.ImmutableDictWithDefault(D('0.000005'), {'USD': D('0.01')})
tolerances = defdict.ImmutableDictWithDefault({'USD': D('0.01')},
default=D('0.000005'))
self.assertEqual(
D('100.12'),
interpolate.quantize_with_tolerance(tolerances, 'USD', D('100.123123123')))
......@@ -596,7 +597,7 @@ class TestQuantize(unittest.TestCase):
D('100.12312'),
interpolate.quantize_with_tolerance(tolerances, 'CAD', D('100.123123123')))
tolerances = defdict.ImmutableDictWithDefault(ZERO, {'USD': D('0.01')})
tolerances = defdict.ImmutableDictWithDefault({'USD': D('0.01')}, default=ZERO)
self.assertEqual(
D('100.12'),
interpolate.quantize_with_tolerance(tolerances, 'USD', D('100.123123123')))
......
This diff is collapsed.
......@@ -145,6 +145,24 @@ class TestInventory(unittest.TestCase):
inv5 = I('100 JPY, 100 USD')
self.assertEqual(inv4, inv5)
def test_op_lt(self):
inv1 = I('100 USD, 100 CAD')
inv2 = I('100 CAD, 100 USD')
self.assertFalse(inv1 < inv2)
self.assertFalse(inv2 < inv1)
inv3 = I('200 USD, 100 CAD')
self.assertTrue(inv1 < inv3)
self.assertTrue(inv2 < inv3)
self.assertFalse(inv3 < inv1)
inv4 = I('100 USD, 100 JPY')
self.assertTrue(inv1 > inv4)
inv5 = I('100 JPY, 100 USD')
self.assertTrue(inv1 > inv5)
self.assertFalse(inv4 < inv5)
def test_is_small__value(self):
test_inv = I('1.50 JPY, 1.51 USD, 1.52 CAD')
for inv in test_inv, -test_inv:
......@@ -227,6 +245,15 @@ class TestInventory(unittest.TestCase):
inv2 = inv * D('3')
self.assertEqual(I('30 HOOL {1.11 USD}, 6.66 CAD'), inv2)
def test_get_only_position(self):
inv = I('10 HOOL {1.11 USD}, 2.22 CAD')
with self.assertRaises(AssertionError):
inv.get_only_position()
inv = I('10 HOOL {1.11 USD}')
self.assertEqual(A('10 HOOL'), inv.get_only_position().units)
inv = I('')
self.assertIsNone(inv.get_only_position())
def test_get_currency_units(self):
inv = I('40.50 JPY, 40.51 USD {1.01 CAD}, 40.52 CAD')
self.assertEqual(inv.get_currency_units('JPY'), A('40.50 JPY'))
......@@ -296,6 +323,10 @@ class TestInventory(unittest.TestCase):
inv = I('2 HOOL {500 USD}, 3 HOOL {520 USD}, 4 HOOL {530 USD}')
self.assertEqual(inv.average(), I('9 HOOL {520 USD}'))
# Test DBZ case
inv = I('2 HOOL {100 USD}, -2 HOOL {102 USD}')
inv.average()
def test_currencies(self):
inv = Inventory()
self.assertEqual(set(), inv.currencies())
......@@ -382,7 +413,7 @@ class TestInventory(unittest.TestCase):
position_, _ = inv.add_amount(A('-12 HOOL'),
Cost(D('700'), 'USD', None, None))
self.assertTrue(inv[0].is_negative_at_cost())
self.assertTrue(next(iter(inv)).is_negative_at_cost())
# Testing the strict case where everything matches, a cost and a lot-date.
inv = Inventory()
......@@ -394,7 +425,7 @@ class TestInventory(unittest.TestCase):
position_, _ = inv.add_amount(A('-12 HOOL'), Cost(D('700'), 'USD',
date(2000, 1, 1), None))
self.assertTrue(inv[0].is_negative_at_cost())
self.assertTrue(next(iter(inv)).is_negative_at_cost())
def test_add_amount__allow_negative(self):
inv = Inventory()
......@@ -410,8 +441,9 @@ class TestInventory(unittest.TestCase):
self.assertIsNone(position_)
# Check for reductions.
self.assertTrue(inv[1].is_negative_at_cost())
self.assertTrue(inv[2].is_negative_at_cost())
invlist = list(inv)
self.assertTrue(invlist[1].is_negative_at_cost())
self.assertTrue(invlist[2].is_negative_at_cost())
inv.add_amount(A('-11 USD'), Cost(D('1.10'), 'CAD', None, None))
inv.add_amount(A('-11 USD'), Cost(D('1.10'), 'CAD', date(2012, 1, 1), None))
self.assertEqual(3, len(inv))
......
......@@ -8,7 +8,6 @@ __license__ = "GNU GPLv2"
import copy
import datetime
import re
import warnings
from typing import NamedTuple, Optional
......
......@@ -85,16 +85,19 @@ class TestCostSpec(unittest.TestCase):
def test_cost_to_str__detail(self):
cost = position.CostSpec(
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b", True)
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b",
True)
self.assertEqual('101.23 # 202.46 USD, 2015-09-06, "f4412439c31b", *',
position.cost_to_str(cost, self.dformat))
cost = position.CostSpec(
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b", False)
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b",
False)
self.assertEqual('101.23 # 202.46 USD, 2015-09-06, "f4412439c31b"',
position.cost_to_str(cost, self.dformat))
cost = position.CostSpec(D('101.23'), None, 'USD', datetime.date(2015, 9, 6), None, True)
cost = position.CostSpec(D('101.23'), None, 'USD', datetime.date(2015, 9, 6),
None, True)
self.assertEqual('101.23 USD, 2015-09-06, *',
position.cost_to_str(cost, self.dformat))
......@@ -120,16 +123,19 @@ class TestCostSpec(unittest.TestCase):
def test_cost_to_str__simple(self):
cost = position.CostSpec(
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b", True)
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b",
True)
self.assertEqual('101.23 # 202.46 USD',
position.cost_to_str(cost, self.dformat, False))
cost = position.CostSpec(
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b", False)
D('101.23'), D('202.46'), 'USD', datetime.date(2015, 9, 6), "f4412439c31b",
False)
self.assertEqual('101.23 # 202.46 USD',
position.cost_to_str(cost, self.dformat, False))
cost = position.CostSpec(D('101.23'), None, 'USD', datetime.date(2015, 9, 6), None, True)
cost = position.CostSpec(D('101.23'), None, 'USD', datetime.date(2015, 9, 6), None,
True)
self.assertEqual('101.23 USD',
position.cost_to_str(cost, self.dformat, False))
......
......@@ -5,5 +5,10 @@ identifying which importer to run on an externally downloaded file, extract
transactions from them and file away these files under a clean and rigidly named
hierarchy for preservation.
"""
__copyright__ = "Copyright (C) 2016 Martin Blais"
__copyright__ = "Copyright (C) 2016,2018 Martin Blais"
__license__ = "GNU GPLv2"
# Register our test helper for rewriting. This has to be done in the parent
# package because it has to run before the first time the module is imported.
import pytest
pytest.register_assert_rewrite('beancount.ingest.regression_pytest')
......@@ -52,8 +52,7 @@ def extract_from_file(filename, importer,
generate Transaction objects with None as value for the 'tags' or 'links'
attributes.
Returns:
A list of new imported entries and a subset of these which have been
identified as possible duplicates.
A list of new imported entries.
Raises:
Exception: If there is an error in the importer's extract() method.
"""
......@@ -69,7 +68,7 @@ def extract_from_file(filename, importer,
kwargs['existing_entries'] = existing_entries
new_entries = importer.extract(file, **kwargs)
if not new_entries:
return [], []
return []
# Make sure the newly imported entries are sorted; don't trust the importer.
new_entries.sort(key=data.entry_sortkey)
......@@ -83,26 +82,39 @@ def extract_from_file(filename, importer,
new_entries = list(itertools.dropwhile(lambda x: x.date < min_date,
new_entries))
# Find potential matching entries.
duplicate_entries = []
if existing_entries is not None:
return new_entries
def find_duplicate_entries(new_entries_list, existing_entries):
"""Flag potentially duplicate entries.
Args:
new_entries_list: A list of pairs of (key, lists of imported entries), one
for each importer. The key identifies the filename and/or importer that
yielded those new entries.
existing_entries: A list of previously existing entries from the target
ledger.
Returns:
A list of lists of modified new entries (like new_entries_list),
potentially with modified metadata to indicate those which are duplicated.
"""
mod_entries_list = []
for key, new_entries in new_entries_list:
# Find similar entries against the existing ledger only.
duplicate_pairs = similar.find_similar_entries(new_entries, existing_entries)
duplicate_set = set(id(entry) for entry, _ in duplicate_pairs)
# Add a metadata marker to the extracted entries for duplicates.
duplicate_set = set(id(entry) for entry, _ in duplicate_pairs)
mod_entries = []
for entry in new_entries:
if entry.meta.get(DUPLICATE_META, False):
duplicate_entries.append(entry)
elif id(entry) in duplicate_set:
if id(entry) in duplicate_set:
marked_meta = entry.meta.copy()
marked_meta[DUPLICATE_META] = True
entry = entry._replace(meta=marked_meta)
duplicate_entries.append(entry)
mod_entries.append(entry)
new_entries = mod_entries
return new_entries, duplicate_entries
mod_entries_list.append((key, mod_entries))
return mod_entries_list
def print_extracted_entries(importer, entries, file):
......@@ -161,34 +173,52 @@ def extract(importer_config,
allow_none_for_tags_and_links = (
options_map and options_map["allow_deprecated_none_for_tags_and_links"])
output.write(HEADER)
# Run all the importers and gather their result sets.
new_entries_list = []
for filename, importers in identify.find_imports(importer_config,
files_or_directories,
output):
files_or_directories):
for importer in importers:
# Import and process the file.
try:
new_entries, duplicate_entries = extract_from_file(
new_entries = extract_from_file(
filename,
importer,
existing_entries=entries,
min_date=mindate,
allow_none_for_tags_and_links=allow_none_for_tags_and_links)
new_entries_list.append((filename, new_entries))
except Exception as exc:
logging.error("Importer %s.extract() raised an unexpected error: %s",
importer.name(), exc)
logging.error("Traceback: %s", traceback.format_exc())
continue
if not new_entries and not duplicate_entries:
continue
if not ascending:
new_entries.reverse()
print_extracted_entries(importer, new_entries, output)
# Find potential duplicate entries in the result sets, either against the
# list of existing ones, or against each other. A single call to this
# function is made on purpose, so that the function be able to merge
# entries.
new_entries_list = find_duplicate_entries(
new_entries_list, entries)
assert isinstance(new_entries_list, list)
assert all(isinstance(new_entries, tuple) for new_entries in new_entries_list)
assert all(isinstance(new_entries[0], str) for new_entries in new_entries_list)
assert all(isinstance(new_entries[1], list) for new_entries in new_entries_list)
# Print out the results.
output.write(HEADER)