PyMenuSite/.env/lib/python3.10/site-packages/tinydb/table.py

751 lines
25 KiB
Python
Raw Normal View History

2023-02-11 08:37:10 -05:00
"""
This module implements tables, the central place for accessing and manipulating
data in TinyDB.
"""
from typing import (
Callable,
Dict,
Iterable,
Iterator,
List,
Mapping,
Optional,
Union,
cast,
Tuple
)
from .queries import QueryLike
from .storages import Storage
from .utils import LRUCache
__all__ = ('Document', 'Table')
class Document(dict):
"""
A document stored in the database.
This class provides a way to access both a document's content and
its ID using ``doc.doc_id``.
"""
def __init__(self, value: Mapping, doc_id: int):
super().__init__(value)
self.doc_id = doc_id
class Table:
"""
Represents a single TinyDB table.
It provides methods for accessing and manipulating documents.
.. admonition:: Query Cache
As an optimization, a query cache is implemented using a
:class:`~tinydb.utils.LRUCache`. This class mimics the interface of
a normal ``dict``, but starts to remove the least-recently used entries
once a threshold is reached.
The query cache is updated on every search operation. When writing
data, the whole cache is discarded as the query results may have
changed.
.. admonition:: Customization
For customization, the following class variables can be set:
- ``document_class`` defines the class that is used to represent
documents,
- ``document_id_class`` defines the class that is used to represent
document IDs,
- ``query_cache_class`` defines the class that is used for the query
cache
- ``default_query_cache_capacity`` defines the default capacity of
the query cache
.. versionadded:: 4.0
:param storage: The storage instance to use for this table
:param name: The table name
:param cache_size: Maximum capacity of query cache
"""
#: The class used to represent documents
#:
#: .. versionadded:: 4.0
document_class = Document
#: The class used to represent a document ID
#:
#: .. versionadded:: 4.0
document_id_class = int
#: The class used for caching query results
#:
#: .. versionadded:: 4.0
query_cache_class = LRUCache
#: The default capacity of the query cache
#:
#: .. versionadded:: 4.0
default_query_cache_capacity = 10
def __init__(
self,
storage: Storage,
name: str,
cache_size: int = default_query_cache_capacity
):
"""
Create a table instance.
"""
self._storage = storage
self._name = name
self._query_cache: LRUCache[QueryLike, List[Document]] \
= self.query_cache_class(capacity=cache_size)
self._next_id = None
def __repr__(self):
args = [
'name={!r}'.format(self.name),
'total={}'.format(len(self)),
'storage={}'.format(self._storage),
]
return '<{} {}>'.format(type(self).__name__, ', '.join(args))
@property
def name(self) -> str:
"""
Get the table name.
"""
return self._name
@property
def storage(self) -> Storage:
"""
Get the table storage instance.
"""
return self._storage
def insert(self, document: Mapping) -> int:
"""
Insert a new document into the table.
:param document: the document to insert
:returns: the inserted document's ID
"""
# Make sure the document implements the ``Mapping`` interface
if not isinstance(document, Mapping):
raise ValueError('Document is not a Mapping')
# First, we get the document ID for the new document
if isinstance(document, Document):
# For a `Document` object we use the specified ID
doc_id = document.doc_id
# We also reset the stored next ID so the next insert won't
# re-use document IDs by accident when storing an old value
self._next_id = None
else:
# In all other cases we use the next free ID
doc_id = self._get_next_id()
# Now, we update the table and add the document
def updater(table: dict):
if doc_id in table:
raise ValueError(f'Document with ID {str(doc_id)} '
f'already exists')
# By calling ``dict(document)`` we convert the data we got to a
# ``dict`` instance even if it was a different class that
# implemented the ``Mapping`` interface
table[doc_id] = dict(document)
# See below for details on ``Table._update``
self._update_table(updater)
return doc_id
def insert_multiple(self, documents: Iterable[Mapping]) -> List[int]:
"""
Insert multiple documents into the table.
:param documents: an Iterable of documents to insert
:returns: a list containing the inserted documents' IDs
"""
doc_ids = []
def updater(table: dict):
for document in documents:
# Make sure the document implements the ``Mapping`` interface
if not isinstance(document, Mapping):
raise ValueError('Document is not a Mapping')
if isinstance(document, Document):
# Check if document does not override an existing document
if document.doc_id in table:
raise ValueError(
f'Document with ID {str(document.doc_id)} '
f'already exists'
)
# Store the doc_id, so we can return all document IDs
# later. Then save the document with its doc_id and
# skip the rest of the current loop
doc_id = document.doc_id
doc_ids.append(doc_id)
table[doc_id] = dict(document)
continue
# Generate new document ID for this document
# Store the doc_id, so we can return all document IDs
# later, then save the document with the new doc_id
doc_id = self._get_next_id()
doc_ids.append(doc_id)
table[doc_id] = dict(document)
# See below for details on ``Table._update``
self._update_table(updater)
return doc_ids
def all(self) -> List[Document]:
"""
Get all documents stored in the table.
:returns: a list with all documents.
"""
# iter(self) (implemented in Table.__iter__ provides an iterator
# that returns all documents in this table. We use it to get a list
# of all documents by using the ``list`` constructor to perform the
# conversion.
return list(iter(self))
def search(self, cond: QueryLike) -> List[Document]:
"""
Search for all documents matching a 'where' cond.
:param cond: the condition to check against
:returns: list of matching documents
"""
# First, we check the query cache to see if it has results for this
# query
cached_results = self._query_cache.get(cond)
if cached_results is not None:
return cached_results[:]
# Perform the search by applying the query to all documents.
# Then, only if the document matches the query, convert it
# to the document class and document ID class.
docs = [
self.document_class(doc, self.document_id_class(doc_id))
for doc_id, doc in self._read_table().items()
if cond(doc)
]
# Only cache cacheable queries.
#
# This weird `getattr` dance is needed to make MyPy happy as
# it doesn't know that a query might have a `is_cacheable` method
# that is not declared in the `QueryLike` protocol due to it being
# optional.
# See: https://github.com/python/mypy/issues/1424
#
# Note also that by default we expect custom query objects to be
# cacheable (which means they need to have a stable hash value).
# This is to keep consistency with TinyDB's behavior before
# `is_cacheable` was introduced which assumed that all queries
# are cacheable.
is_cacheable: Callable[[], bool] = getattr(cond, 'is_cacheable',
lambda: True)
if is_cacheable():
# Update the query cache
self._query_cache[cond] = docs[:]
return docs
def get(
self,
cond: Optional[QueryLike] = None,
doc_id: Optional[int] = None,
) -> Optional[Document]:
"""
Get exactly one document specified by a query or a document ID.
Returns ``None`` if the document doesn't exist.
:param cond: the condition to check against
:param doc_id: the document's ID
:returns: the document or ``None``
"""
if doc_id is not None:
# Retrieve a document specified by its ID
table = self._read_table()
raw_doc = table.get(str(doc_id), None)
if raw_doc is None:
return None
# Convert the raw data to the document class
return self.document_class(raw_doc, doc_id)
elif cond is not None:
# Find a document specified by a query
# The trailing underscore in doc_id_ is needed so MyPy
# doesn't think that `doc_id_` (which is a string) needs
# to have the same type as `doc_id` which is this function's
# parameter and is an optional `int`.
for doc_id_, doc in self._read_table().items():
if cond(doc):
return self.document_class(
doc,
self.document_id_class(doc_id_)
)
return None
raise RuntimeError('You have to pass either cond or doc_id')
def contains(
self,
cond: Optional[QueryLike] = None,
doc_id: Optional[int] = None
) -> bool:
"""
Check whether the database contains a document matching a query or
an ID.
If ``doc_id`` is set, it checks if the db contains the specified ID.
:param cond: the condition use
:param doc_id: the document ID to look for
"""
if doc_id is not None:
# Documents specified by ID
return self.get(doc_id=doc_id) is not None
elif cond is not None:
# Document specified by condition
return self.get(cond) is not None
raise RuntimeError('You have to pass either cond or doc_id')
def update(
self,
fields: Union[Mapping, Callable[[Mapping], None]],
cond: Optional[QueryLike] = None,
doc_ids: Optional[Iterable[int]] = None,
) -> List[int]:
"""
Update all matching documents to have a given set of fields.
:param fields: the fields that the matching documents will have
or a method that will update the documents
:param cond: which documents to update
:param doc_ids: a list of document IDs
:returns: a list containing the updated document's ID
"""
# Define the function that will perform the update
if callable(fields):
def perform_update(table, doc_id):
# Update documents by calling the update function provided by
# the user
fields(table[doc_id])
else:
def perform_update(table, doc_id):
# Update documents by setting all fields from the provided data
table[doc_id].update(fields)
if doc_ids is not None:
# Perform the update operation for documents specified by a list
# of document IDs
updated_ids = list(doc_ids)
def updater(table: dict):
# Call the processing callback with all document IDs
for doc_id in updated_ids:
perform_update(table, doc_id)
# Perform the update operation (see _update_table for details)
self._update_table(updater)
return updated_ids
elif cond is not None:
# Perform the update operation for documents specified by a query
# Collect affected doc_ids
updated_ids = []
def updater(table: dict):
_cond = cast(QueryLike, cond)
# We need to convert the keys iterator to a list because
# we may remove entries from the ``table`` dict during
# iteration and doing this without the list conversion would
# result in an exception (RuntimeError: dictionary changed size
# during iteration)
for doc_id in list(table.keys()):
# Pass through all documents to find documents matching the
# query. Call the processing callback with the document ID
if _cond(table[doc_id]):
# Add ID to list of updated documents
updated_ids.append(doc_id)
# Perform the update (see above)
perform_update(table, doc_id)
# Perform the update operation (see _update_table for details)
self._update_table(updater)
return updated_ids
else:
# Update all documents unconditionally
updated_ids = []
def updater(table: dict):
# Process all documents
for doc_id in list(table.keys()):
# Add ID to list of updated documents
updated_ids.append(doc_id)
# Perform the update (see above)
perform_update(table, doc_id)
# Perform the update operation (see _update_table for details)
self._update_table(updater)
return updated_ids
def update_multiple(
self,
updates: Iterable[
Tuple[Union[Mapping, Callable[[Mapping], None]], QueryLike]
],
) -> List[int]:
"""
Update all matching documents to have a given set of fields.
:returns: a list containing the updated document's ID
"""
# Define the function that will perform the update
def perform_update(fields, table, doc_id):
if callable(fields):
# Update documents by calling the update function provided
# by the user
fields(table[doc_id])
else:
# Update documents by setting all fields from the provided
# data
table[doc_id].update(fields)
# Perform the update operation for documents specified by a query
# Collect affected doc_ids
updated_ids = []
def updater(table: dict):
# We need to convert the keys iterator to a list because
# we may remove entries from the ``table`` dict during
# iteration and doing this without the list conversion would
# result in an exception (RuntimeError: dictionary changed size
# during iteration)
for doc_id in list(table.keys()):
for fields, cond in updates:
_cond = cast(QueryLike, cond)
# Pass through all documents to find documents matching the
# query. Call the processing callback with the document ID
if _cond(table[doc_id]):
# Add ID to list of updated documents
updated_ids.append(doc_id)
# Perform the update (see above)
perform_update(fields, table, doc_id)
# Perform the update operation (see _update_table for details)
self._update_table(updater)
return updated_ids
def upsert(self, document: Mapping, cond: Optional[QueryLike] = None) -> List[int]:
"""
Update documents, if they exist, insert them otherwise.
Note: This will update *all* documents matching the query. Document
argument can be a tinydb.table.Document object if you want to specify a
doc_id.
:param document: the document to insert or the fields to update
:param cond: which document to look for, optional if you've passed a
Document with a doc_id
:returns: a list containing the updated documents' IDs
"""
# Extract doc_id
if isinstance(document, Document) and hasattr(document, 'doc_id'):
doc_ids: Optional[List[int]] = [document.doc_id]
else:
doc_ids = None
# Make sure we can actually find a matching document
if doc_ids is None and cond is None:
raise ValueError("If you don't specify a search query, you must "
"specify a doc_id. Hint: use a table.Document "
"object.")
# Perform the update operation
try:
updated_docs: Optional[List[int]] = self.update(document, cond, doc_ids)
except KeyError:
# This happens when a doc_id is specified, but it's missing
updated_docs = None
# If documents have been updated: return their IDs
if updated_docs:
return updated_docs
# There are no documents that match the specified query -> insert the
# data as a new document
return [self.insert(document)]
def remove(
self,
cond: Optional[QueryLike] = None,
doc_ids: Optional[Iterable[int]] = None,
) -> List[int]:
"""
Remove all matching documents.
:param cond: the condition to check against
:param doc_ids: a list of document IDs
:returns: a list containing the removed documents' ID
"""
if doc_ids is not None:
# This function returns the list of IDs for the documents that have
# been removed. When removing documents identified by a set of
# document IDs, it's this list of document IDs we need to return
# later.
# We convert the document ID iterator into a list, so we can both
# use the document IDs to remove the specified documents and
# to return the list of affected document IDs
removed_ids = list(doc_ids)
def updater(table: dict):
for doc_id in removed_ids:
table.pop(doc_id)
# Perform the remove operation
self._update_table(updater)
return removed_ids
if cond is not None:
removed_ids = []
# This updater function will be called with the table data
# as its first argument. See ``Table._update`` for details on this
# operation
def updater(table: dict):
# We need to convince MyPy (the static type checker) that
# the ``cond is not None`` invariant still holds true when
# the updater function is called
_cond = cast(QueryLike, cond)
# We need to convert the keys iterator to a list because
# we may remove entries from the ``table`` dict during
# iteration and doing this without the list conversion would
# result in an exception (RuntimeError: dictionary changed size
# during iteration)
for doc_id in list(table.keys()):
if _cond(table[doc_id]):
# Add document ID to list of removed document IDs
removed_ids.append(doc_id)
# Remove document from the table
table.pop(doc_id)
# Perform the remove operation
self._update_table(updater)
return removed_ids
raise RuntimeError('Use truncate() to remove all documents')
def truncate(self) -> None:
"""
Truncate the table by removing all documents.
"""
# Update the table by resetting all data
self._update_table(lambda table: table.clear())
# Reset document ID counter
self._next_id = None
def count(self, cond: QueryLike) -> int:
"""
Count the documents matching a query.
:param cond: the condition use
"""
return len(self.search(cond))
def clear_cache(self) -> None:
"""
Clear the query cache.
"""
self._query_cache.clear()
def __len__(self):
"""
Count the total number of documents in this table.
"""
return len(self._read_table())
def __iter__(self) -> Iterator[Document]:
"""
Iterate over all documents stored in the table.
:returns: an iterator over all documents.
"""
# Iterate all documents and their IDs
for doc_id, doc in self._read_table().items():
# Convert documents to the document class
yield self.document_class(doc, self.document_id_class(doc_id))
def _get_next_id(self):
"""
Return the ID for a newly inserted document.
"""
# If we already know the next ID
if self._next_id is not None:
next_id = self._next_id
self._next_id = next_id + 1
return next_id
# Determine the next document ID by finding out the max ID value
# of the current table documents
# Read the table documents
table = self._read_table()
# If the table is empty, set the initial ID
if not table:
next_id = 1
self._next_id = next_id + 1
return next_id
# Determine the next ID based on the maximum ID that's currently in use
max_id = max(self.document_id_class(i) for i in table.keys())
next_id = max_id + 1
# The next ID we will return AFTER this call needs to be larger than
# the current next ID we calculated
self._next_id = next_id + 1
return next_id
def _read_table(self) -> Dict[str, Mapping]:
"""
Read the table data from the underlying storage.
Documents and doc_ids are NOT yet transformed, as
we may not want to convert *all* documents when returning
only one document for example.
"""
# Retrieve the tables from the storage
tables = self._storage.read()
if tables is None:
# The database is empty
return {}
# Retrieve the current table's data
try:
table = tables[self.name]
except KeyError:
# The table does not exist yet, so it is empty
return {}
return table
def _update_table(self, updater: Callable[[Dict[int, Mapping]], None]):
"""
Perform a table update operation.
The storage interface used by TinyDB only allows to read/write the
complete database data, but not modifying only portions of it. Thus,
to only update portions of the table data, we first perform a read
operation, perform the update on the table data and then write
the updated data back to the storage.
As a further optimization, we don't convert the documents into the
document class, as the table data will *not* be returned to the user.
"""
tables = self._storage.read()
if tables is None:
# The database is empty
tables = {}
try:
raw_table = tables[self.name]
except KeyError:
# The table does not exist yet, so it is empty
raw_table = {}
# Convert the document IDs to the document ID class.
# This is required as the rest of TinyDB expects the document IDs
# to be an instance of ``self.document_id_class`` but the storage
# might convert dict keys to strings.
table = {
self.document_id_class(doc_id): doc
for doc_id, doc in raw_table.items()
}
# Perform the table update operation
updater(table)
# Convert the document IDs back to strings.
# This is required as some storages (most notably the JSON file format)
# don't support IDs other than strings.
tables[self.name] = {
str(doc_id): doc
for doc_id, doc in table.items()
}
# Write the newly updated data back to the storage
self._storage.write(tables)
# Clear the query cache, as the table contents have changed
self.clear_cache()