From 4f39d0f1446f78030fb41326560abc46eaf0eb03 Mon Sep 17 00:00:00 2001 From: Raboneko <119771935+raboneko@users.noreply.github.com> Date: Mon, 12 Jan 2026 16:44:04 -0800 Subject: [PATCH] add: tokenizers (#9083) (#9143) (cherry picked from commit 6582842fd946ef5ebfbb3e32391e6581be6148e1) Signed-off-by: Owen-sz Co-authored-by: Owen Zimmerman <123591347+Owen-sz@users.noreply.github.com> --- anda/langs/python/tokenizers/anda.hcl | 5 ++ anda/langs/python/tokenizers/tokenizers.spec | 51 ++++++++++++++++++++ anda/langs/python/tokenizers/update.rhai | 1 + 3 files changed, 57 insertions(+) create mode 100644 anda/langs/python/tokenizers/anda.hcl create mode 100644 anda/langs/python/tokenizers/tokenizers.spec create mode 100644 anda/langs/python/tokenizers/update.rhai diff --git a/anda/langs/python/tokenizers/anda.hcl b/anda/langs/python/tokenizers/anda.hcl new file mode 100644 index 0000000000..f99a421b5a --- /dev/null +++ b/anda/langs/python/tokenizers/anda.hcl @@ -0,0 +1,5 @@ +project pkg { + rpm { + spec = "tokenizers.spec" + } +} diff --git a/anda/langs/python/tokenizers/tokenizers.spec b/anda/langs/python/tokenizers/tokenizers.spec new file mode 100644 index 0000000000..63e03995c8 --- /dev/null +++ b/anda/langs/python/tokenizers/tokenizers.spec @@ -0,0 +1,51 @@ +%global pypi_name tokenizers +%global _desc Fast State-of-the-Art Tokenizers optimized for Research and Production. + +Name: python-%{pypi_name} +Version: 0.22.2 +Release: 1%?dist +Summary: Fast State-of-the-Art Tokenizers optimized for Research and Production +License: Apache-2.0 +URL: https://github.com/huggingface/tokenizers +Source0: %{pypi_source} +Source1: https://github.com/huggingface/tokenizers/blob/main/LICENSE +Source2: https://github.com/huggingface/tokenizers/blob/main/README.md + +BuildRequires: python3-devel +BuildRequires: python3-wheel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +BuildRequires: maturin +BuildRequires: gcc-c++ + +Packager: Owen Zimmerman + +%description +%_desc + +%package -n python3-%{pypi_name} +Summary: %{summary} +%{?python_provide:%python_provide python3-%{pypi_name}} + +%description -n python3-%{pypi_name} +%_desc + +%prep +%autosetup -n tokenizers-%{version} +cp %{SOURCE1} . +cp %{SOURCE2} . + +%build +%pyproject_wheel + +%install +%pyproject_install +%pyproject_save_files tokenizers + +%files -n python3-%{pypi_name} -f %{pyproject_files} +%doc README.md +%license LICENSE + +%changelog +* Sat Jan 10 2026 Owen Zimmerman +- Initial commit diff --git a/anda/langs/python/tokenizers/update.rhai b/anda/langs/python/tokenizers/update.rhai new file mode 100644 index 0000000000..a1368751fc --- /dev/null +++ b/anda/langs/python/tokenizers/update.rhai @@ -0,0 +1 @@ +rpm.version(pypi("tokenizers"));