add fix to avoid empty historical <think> blocks to prevent cache invalidation in long-running agentic workloads.
Browse fileshttps://huggingface.co/Qwen/Qwen3.5-122B-A10B/discussions/22
https://www.reddit.com/r/LocalLLaMA/comments/1sg076h/i_tracked_a_major_cache_reuse_issue_down_to_qwen/
- chat_template.jinja +1 -1
chat_template.jinja
CHANGED
|
@@ -122,7 +122,7 @@
|
|
| 122 |
{%- endif %}
|
| 123 |
{%- endif %}
|
| 124 |
{%- set reasoning_content = reasoning_content|trim %}
|
| 125 |
-
{%- if loop.index0 > ns.last_query_index %}
|
| 126 |
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
|
| 127 |
{%- else %}
|
| 128 |
{{- '<|im_start|>' + message.role + '\n' + content }}
|
|
|
|
| 122 |
{%- endif %}
|
| 123 |
{%- endif %}
|
| 124 |
{%- set reasoning_content = reasoning_content|trim %}
|
| 125 |
+
{%- if loop.index0 > ns.last_query_index and reasoning_content %}
|
| 126 |
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
|
| 127 |
{%- else %}
|
| 128 |
{{- '<|im_start|>' + message.role + '\n' + content }}
|