Spaces:
Running
Running
Fix: Added .strip() to all ScandiProb input text + slight markdown changes again
Browse files
app.py
CHANGED
|
@@ -63,7 +63,8 @@ def da_no_cross_skew(text):
|
|
| 63 |
return [no_skew, da_skew]
|
| 64 |
|
| 65 |
def ScandiProb(text):
|
| 66 |
-
|
|
|
|
| 67 |
return "None", {label: 0.0 for label in labels}
|
| 68 |
|
| 69 |
inputs = tokenizer(text, return_tensors="pt")
|
|
@@ -106,8 +107,8 @@ with gr.Blocks() as demo:
|
|
| 106 |
gr.Markdown("# ScandiProb: Hybrid Language ID Classifier")
|
| 107 |
gr.Markdown("### By Ian Rodriguez")
|
| 108 |
gr.Markdown("Enter text to output independent probabilities that it is written in **Norwegian**, **Swedish**, **Danish**, or **None of the Above / Non-Scandinavian**.")
|
| 109 |
-
gr.Markdown("This model utilizes a fine-tuned [ScandiBERT](https://huggingface.co/vesteinn/ScandiBERT), trained on limited amounts of [OPUS-100](https://huggingface.co/datasets/Helsinki-NLP/opus-100/), combined with regex-enforced heuristics. Achieves ~84% macro-F1 score on the [SLIDE eval set](https://huggingface.co/datasets/ltg/slide) with a fraction of the training data used in the 2025 SLIDE paper.")
|
| 110 |
-
gr.Markdown("[This project is licensed under AGPL-3.0.](https://www.gnu.org/licenses/agpl-3.0.en.html)
|
| 111 |
gr.Markdown("([GitHub](https://github.com/cloudeerie/scandiprob) | [Kaggle Notebooks](https://www.kaggle.com/code/cloudeerie/scandiprob/))")
|
| 112 |
|
| 113 |
with gr.Row():
|
|
|
|
| 63 |
return [no_skew, da_skew]
|
| 64 |
|
| 65 |
def ScandiProb(text):
|
| 66 |
+
text = text.strip()
|
| 67 |
+
if not text:
|
| 68 |
return "None", {label: 0.0 for label in labels}
|
| 69 |
|
| 70 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
|
| 107 |
gr.Markdown("# ScandiProb: Hybrid Language ID Classifier")
|
| 108 |
gr.Markdown("### By Ian Rodriguez")
|
| 109 |
gr.Markdown("Enter text to output independent probabilities that it is written in **Norwegian**, **Swedish**, **Danish**, or **None of the Above / Non-Scandinavian**.")
|
| 110 |
+
gr.Markdown("This model utilizes a fine-tuned [ScandiBERT](https://huggingface.co/vesteinn/ScandiBERT), trained on limited amounts of [OPUS-100](https://huggingface.co/datasets/Helsinki-NLP/opus-100/), combined with regex-enforced heuristics. Achieves ~84% macro-F1 score on the comprehensive [SLIDE eval set](https://huggingface.co/datasets/ltg/slide) with a fraction of the training data used in the 2025 SLIDE paper.")
|
| 111 |
+
gr.Markdown("[This project is licensed under AGPL-3.0.](https://www.gnu.org/licenses/agpl-3.0.en.html)")
|
| 112 |
gr.Markdown("([GitHub](https://github.com/cloudeerie/scandiprob) | [Kaggle Notebooks](https://www.kaggle.com/code/cloudeerie/scandiprob/))")
|
| 113 |
|
| 114 |
with gr.Row():
|