How to use from the
Use from the
llama-cpp-python library
# !pip install llama-cpp-python

from llama_cpp import Llama

llm = Llama.from_pretrained(
	repo_id="Lauarvik/NVIDIA-Nemotron-Nano-9B-v2-GGUF",
	filename="",
)
llm.create_chat_completion(
	messages = "No input example has been defined for this model task."
)

Quantized version of nvidia/NVIDIA-Nemotron-Nano-9B-v2

ะŸั€ะธ ะบะฒะฐะฝั‚ะธะทะฐั†ะธะธ ะธัะฟะพะปัŒะทะพะฒะฐะปะฐััŒ imatrix, ะบะพั€ะฟัƒั ั‚ะตะบัั‚ะพะฒ ะดะปั ะฝะตั‘ ะฑั‹ะป ัะพะทะดะฐะฝ ัะปะตะดัƒัŽั‰ะธะผ ะพะฑั€ะฐะทะพะผ:

import json
import re
import hashlib
import time
from datasets import load_dataset

OUT_PATH = "calib_nemotron.jsonl"

TARGET_SAMPLES = 7000
CHUNK_SIZE = 900
MIN_LEN = 300
MAX_LEN = 3000

out = open(OUT_PATH, "w", encoding="utf-8")

seen = set()
written = 0


# -----------------------
# SAFE LOAD
# -----------------------
def safe_load(*args, **kwargs):
    for i in range(5):
        try:
            return load_dataset(*args, **kwargs)
        except Exception as e:
            print("retry", i, e)
            time.sleep(2)
    raise RuntimeError("failed to load dataset")


# -----------------------
# CLEAN
# -----------------------
def clean_text(txt: str) -> str:
    if not txt:
        return ""

    txt = re.sub(r"<[^>]+>", " ", txt)
    txt = re.sub(r"\s+", " ", txt).strip()

    if "\x00" in txt:
        return ""

    return txt


# -----------------------
# DEDUP
# -----------------------
def is_duplicate(txt: str) -> bool:
    h = hashlib.blake2b(txt.encode("utf-8"), digest_size=8).hexdigest()
    if h in seen:
        return True
    seen.add(h)
    return False


# -----------------------
# CHUNK
# -----------------------
def split_chunks(txt: str):
    for i in range(0, len(txt), CHUNK_SIZE):
        chunk = txt[i:i + CHUNK_SIZE]
        if len(chunk) >= MIN_LEN:
            yield chunk


# -----------------------
# WRITE
# -----------------------
def process_text(txt: str):
    global written

    txt = clean_text(txt)
    if not txt or len(txt) < MIN_LEN:
        return

    chunks = split_chunks(txt) if len(txt) > MAX_LEN else [txt]

    for chunk in chunks:
        if written >= TARGET_SAMPLES:
            return

        if is_duplicate(chunk):
            continue

        out.write(json.dumps({"text": chunk}, ensure_ascii=False) + "\n")
        written += 1


# -----------------------
# CHAT
# -----------------------
def handle_chat(ds, ratio):
    global written
    target = int(TARGET_SAMPLES * ratio)
    start = written

    for x in ds:
        if written - start >= target:
            break

        conv = x.get("conversations")
        if not conv:
            continue

        txt = "\n".join(
            f"{m.get('from','')}: {m.get('value','')}"
            for m in conv if m.get("value")
        )

        process_text(txt)


# -----------------------
# TEXT
# -----------------------
def handle_text(ds, field, ratio):
    global written
    target = int(TARGET_SAMPLES * ratio)
    start = written

    for x in ds:
        if written - start >= target:
            break

        process_text(x.get(field))


# -----------------------
# CODE
# -----------------------
def handle_code(ds, lang, ratio):
    global written
    target = int(TARGET_SAMPLES * ratio)
    start = written

    for x in ds:
        if written - start >= target:
            break

        if x.get("lang") == lang:
            process_text(x.get("content"))


# =======================
# DATASETS (ONLY SAFE ONES)
# =======================

print("chat...")
ds = safe_load("teknium/OpenHermes-2.5", split="train", streaming=True)
handle_chat(ds, 0.35)

print("en text...")
ds = safe_load("wikitext", "wikitext-103-raw-v1", split="train", streaming=True)
handle_text(ds, "text", 0.25)

print("ru fallback (wiki dump alternative)...")

# ะฑะตะทะพะฟะฐัะฝะฐั ะทะฐะผะตะฝะฐ RU:
ds = safe_load("wikimedia/wikipedia", "20231101.ru", split="train", streaming=True)
handle_text(ds, "text", 0.2)

print("rust...")
ds = safe_load("bigcode/the-stack-smol", split="train", streaming=True)
handle_code(ds, "Rust", 0.1)

print("python...")
ds = safe_load("bigcode/the-stack-smol", split="train", streaming=True)
handle_code(ds, "Python", 0.1)

out.close()

print("written:", written)
Downloads last month
649
GGUF
Model size
9B params
Architecture
nemotron_h
Hardware compatibility
Log In to add your hardware

2-bit

3-bit

4-bit

6-bit

8-bit

16-bit

Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for Lauarvik/NVIDIA-Nemotron-Nano-9B-v2-GGUF