Source code for fleche.query

from dataclasses import dataclass
from typing import Iterable, Iterator, Any

import pandas as pd

from . import call


@dataclass(frozen=True)
[docs] class QueryIterator(Iterable[call.LazyCall]): """Iterator that adds some convenience to plain iterators over calls of query result. Args: calls: (iterable of call.LazyCall): underlying results of the query"""
[docs] calls: Iterable[call.LazyCall]
[docs] def __iter__(self) -> Iterator[call.LazyCall]: yield from self.calls
[docs] def table(self, arguments: Iterable[str] = (), results=False) -> pd.DataFrame: """Return a pandas DataFrame summarizing queried calls. Arguments and results are elided. The DataFrame index will be the lookup key (digest) of each call. Columns are: - `name`: the function name - `module`: the module name - 'result`: if `results` argument is `True` - metadata fields are flattened and added as columns directly If given argument names collide with any of the above columns, the are prefixed by 'a_'. Only requested arguments are loaded from cache. Args: arguments (iterable of str): add the given arguments (of the queried calls) as columns to the table results (bool): if True, add results of queried calls to table Returns: :class:`pandas.DataFrame`: table of all calls on cache """ arguments = tuple(arguments) rows: dict[str, dict[str, Any]] = {} for c in self.calls: row = { prop: getattr(c, prop) for prop in ("name", "module", "metadata") } if results: row["result"] = c.result for a in arguments: # TODO: quick and easy strategy to avoid name clashes, alternative would be to use multicolumns, but # those are a bit annoying if a not in row: row[a] = c.arguments.get(a, None) else: row[f"a_{a}"] = c.arguments.get(a, None) md = row.pop("metadata", {}) or {} # Flatten each metadata name's dict into the row for data in md.values(): if isinstance(data, dict): row.update(data) rows[str(c.to_lookup_key())] = row return pd.DataFrame.from_dict(rows, orient="index")
[docs] def results(self) -> Iterable[Any]: """Returns an iterable over the results of queried calls.""" for c in self.calls: yield c.result