jina-reranker-m0 / custom_transformer.py
numb3r3's picture
Integrate with Sentence Transformers v5.4 (#24)
94bfe0a
"""Custom Transformer module for jina-reranker-m0 that fixes image ordering for image-image pairs.
The Qwen2VL processor extracts images from messages in iteration order. ST creates messages
as [query_msg, doc_msg], but the chat template renders doc-first. For single-image pairs this
is fine, but for image-image pairs the two images get swapped. This module swaps the pair
elements so the processor extracts images in doc-first order, matching the template rendering.
Since both elements render as identical <|image_pad|> tokens, the role swap is invisible.
"""
from __future__ import annotations
from typing import Any
from PIL import Image
from sentence_transformers.base.modality import is_image_url_or_path
from sentence_transformers.base.modules.transformer import Transformer
def _is_image(item: Any) -> bool:
return isinstance(item, Image.Image) or (isinstance(item, str) and is_image_url_or_path(item))
class JinaRerankerTransformer(Transformer):
def preprocess(
self,
inputs: list,
prompt: str | None = None,
**kwargs,
) -> dict[str, Any]:
# Swap image-image pairs so the processor extracts images in doc-first order,
# matching the chat template's doc-first rendering.
swapped = []
for item in inputs:
if isinstance(item, (list, tuple)) and len(item) == 2 and _is_image(item[0]) and _is_image(item[1]):
swapped.append((item[1], item[0]))
else:
swapped.append(item)
return super().preprocess(swapped, prompt=prompt, **kwargs)