Integrate with Sentence Transformers v5.4 (#9)

- Integrate with Sentence Transformers v5.4.0 (062adb650ef980633805fa8f6fbc9481e6559fef)
- Add missing </b> (6cb650209ce3f8a3d65162af17b3f1084e15d18e)
- Rename CausalScoreHead to LogitScore (462e1b8c1348562525b2096966ee58151e5b352a)
- Move 'message_format' into 'modality_config' under 'message' -> 'format' (2f26065a0f46dd5ea2809cb2f93cedb359d77093)
- Add missing README changes (6f9c43608d5a9063a85df115b3e40398ce5f6a5e)
- Merge branch 'main' of https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v2 (3472e9d1489bb31112962bf8faa59faa855081d6)

Files changed (7) hide show

1_LogitScore/config.json +4 -0
README.md +37 -1
chat_template.jinja +9 -0
config_sentence_transformers.json +10 -0
modules.json +14 -0
sentence_bert_config.json +15 -0
tokenizer_config.json +0 -1

1_LogitScore/config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "true_token_id": 16,
+  "false_token_id": 15
+}

README.md CHANGED Viewed

@@ -121,6 +121,8 @@ language_bcp47:
   - zh-Hant
 license: apache-2.0
 pipeline_tag: text-ranking
 ---
 <br><br>
@@ -134,7 +136,7 @@ pipeline_tag: text-ranking
 </p>
 <p align="center">
-<sup> 🍞 Looking for a simple end-to-end retrieval solution? Meet Omni, our multimodal and multilingual model. <a href="https://mixedbread.com"><b>Get in touch for access.</a> </sup>
 </p>
 # 🍞 mxbai-rerank-base-v2 (a.k.a ProRank-0.5B)
@@ -160,6 +162,40 @@ We have two models:
 ## ⚙️ Usage
 1. Install mxbai-rerank
 ```bash

   - zh-Hant
 license: apache-2.0
 pipeline_tag: text-ranking
+tags:
+  - sentence-transformers
 ---
 <br><br>
 </p>
 <p align="center">
+<sup> 🍞 Looking for a simple end-to-end retrieval solution? Meet Omni, our multimodal and multilingual model. <a href="https://mixedbread.com"><b>Get in touch for access.</b></a> </sup>
 </p>
 # 🍞 mxbai-rerank-base-v2 (a.k.a ProRank-0.5B)
 ## ⚙️ Usage
+### Using Sentence Transformers
+Install Sentence Transformers:
+```bash
+pip install sentence_transformers
+```
+```python
+from sentence_transformers import CrossEncoder
+model = CrossEncoder("mixedbread-ai/mxbai-rerank-base-v2")
+query = "Who wrote 'To Kill a Mockingbird'?"
+documents = [
+    "'To Kill a Mockingbird' is a novel by Harper Lee published in 1960. It was immediately successful, winning the Pulitzer Prize, and has become a classic of modern American literature.",
+    "The novel 'Moby-Dick' was written by Herman Melville and first published in 1851. It is considered a masterpiece of American literature and deals with complex themes of obsession, revenge, and the conflict between good and evil.",
+    "Harper Lee, an American novelist widely known for her novel 'To Kill a Mockingbird', was born in 1926 in Monroeville, Alabama. She received the Pulitzer Prize for Fiction in 1961.",
+    "Jane Austen was an English novelist known primarily for her six major novels, which interpret, critique and comment upon the British landed gentry at the end of the 18th century.",
+    "The 'Harry Potter' series, which consists of seven fantasy novels written by British author J.K. Rowling, is among the most popular and critically acclaimed books of the modern era.",
+    "'The Great Gatsby', a novel written by American author F. Scott Fitzgerald, was published in 1925. The story is set in the Jazz Age and follows the life of millionaire Jay Gatsby and his pursuit of Daisy Buchanan.",
+]
+pairs = [(query, doc) for doc in documents]
+scores = model.predict(pairs)
+print(scores)
+# [9.750735   1.3697281  8.080784   2.6611633  0.8729458  0.39884853]
+rankings = model.rank(query, documents)
+print(rankings)
+# [{'corpus_id': 0, 'score': np.float32(9.750735)}, {'corpus_id': 2, 'score': np.float32(8.080784)}, {'corpus_id': 3, 'score': np.float32(2.6611633)}, {'corpus_id': 1, 'score': np.float32(1.3697281)}, {'corpus_id': 4, 'score': np.float32(0.8729458)}, {'corpus_id': 5, 'score': np.float32(0.39884853)}]
+```
+### Using mxbai-rerank
 1. Install mxbai-rerank
 ```bash

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,9 @@

+<|im_start|>system
+You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
+<|im_start|>user
+query: {{ (messages | selectattr("role", "eq", "query") | first).content }}
+document: {{ (messages | selectattr("role", "eq", "document") | first).content }}
+You are a search relevance expert who evaluates how well documents match search queries. For each query-document pair, carefully analyze the semantic relationship between them, then provide your binary relevance judgment (0 for not relevant, 1 for relevant).
+Relevance:<|im_end|>
+<|im_start|>assistant

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "model_type": "CrossEncoder",
+  "__version__": {
+    "sentence_transformers": "5.4.0",
+    "transformers": "4.52.0",
+    "pytorch": "2.7.0+cu126"
+  },
+  "prompts": {},
+  "default_prompt_name": null
+}

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.base.modules.transformer.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_LogitScore",
+    "type": "sentence_transformers.cross_encoder.modules.logit_score.LogitScore"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "transformer_task": "text-generation",
+  "modality_config": {
+    "text": {
+      "method": "forward",
+      "method_output_name": "logits"
+    },
+    "message": {
+      "method": "forward",
+      "method_output_name": "logits",
+      "format": "flat"
+    }
+  },
+  "module_output_name": "causal_logits"
+}

tokenizer_config.json CHANGED Viewed

@@ -195,7 +195,6 @@
     "<|video_pad|>"
   ],
   "bos_token": null,
-  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "errors": "replace",

     "<|video_pad|>"
   ],
   "bos_token": null,
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "errors": "replace",