meterviewer.datasets.read.single 源代码

"""
read data from disk.

处理单个数字数据集的函数,数据集格式为 `dataset_name/[0-9]`
"""

from __future__ import annotations

import pathlib
import random
import typing as t

from loguru import logger
from matplotlib import pyplot as plt

from meterviewer import T, files, func
from meterviewer.img import process

from ..dataset import get_dataset_path


[文档] def path_fusion( root: pathlib.Path, dataset_name: str, num: int, ): """生成单个数字图像的路径 Args: root: 根目录路径 dataset_name: 数据集名称 num: 数字(0-9) Returns: 对应数字图像的完整路径 """ p = get_dataset_path(root, dataset_name) / "Digit" / str(num) return p
[文档] def read_rand_img( root: pathlib.Path, get_dataset: t.Callable[[], t.Union[str, pathlib.Path]], digit: t.Union[int, str], promise=False, ) -> T.NpImage: """随机读取一张数字图像,单字 Args: root: 根目录路径 get_dataset: 获取数据集名称的函数 digit: 要读取的数字或'x'(表示空白图像) promise: 是否确保路径存在 Returns: 随机选择的数字图像数组 Raises: Exception: 当数据集中没有图像时抛出异常 """ if digit == "x": im = process.gen_empty_im((32, 40, 3)) return im get_one = read_single_digit( root, get_dataset=get_dataset, num=int(digit), promise=promise, ) all_imgs = list(get_one()) length = len(all_imgs) if length == 0: raise Exception(f"Dataset contains no images, dataset: {get_dataset()}") i = random.randint(0, length - 1) im = plt.imread(all_imgs[i]) return im
[文档] def read_single_digit( root_path: pathlib.Path, get_dataset: t.Callable[[], str | pathlib.Path], num: int, promise: bool, ) -> t.Callable[[], t.Iterator[pathlib.Path]]: """读取单个数字的所有图像 Args: root_path: 根目录路径 get_dataset: 获取数据集名称的函数 num: 要读取的数字(0-9) promise: 是否确保路径存在 Returns: 返回一个生成器函数,用于遍历该数字的所有图像路径 Raises: AssertionError: 当数字不在0-9范围内时抛出 Exception: 当找不到图像时抛出 """ assert num in range(0, 10), "num must be 0~9" def might_fail_func() -> pathlib.Path: return path_fusion(root_path, str(get_dataset()), num) if promise: p = func.try_again( 15, might_fail_func, is_validate_func=lambda p: p.exists(), fail_message=f"cannot num: {num}", ) else: p = might_fail_func() logger.debug(f"path: {p}") def yield_pics(): gen = files.scan_pics(path=p) try: img = next(gen) yield img except StopIteration: raise Exception(f"no images found in dataset {p}") return yield_pics