ianro04 commited on
Commit
c12026a
·
verified ·
1 Parent(s): 4d2e877

Fix: Added .strip() to all ScandiProb input text + slight markdown changes again

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -63,7 +63,8 @@ def da_no_cross_skew(text):
63
  return [no_skew, da_skew]
64
 
65
  def ScandiProb(text):
66
- if not text.strip():
 
67
  return "None", {label: 0.0 for label in labels}
68
 
69
  inputs = tokenizer(text, return_tensors="pt")
@@ -106,8 +107,8 @@ with gr.Blocks() as demo:
106
  gr.Markdown("# ScandiProb: Hybrid Language ID Classifier")
107
  gr.Markdown("### By Ian Rodriguez")
108
  gr.Markdown("Enter text to output independent probabilities that it is written in **Norwegian**, **Swedish**, **Danish**, or **None of the Above / Non-Scandinavian**.")
109
- gr.Markdown("This model utilizes a fine-tuned [ScandiBERT](https://huggingface.co/vesteinn/ScandiBERT), trained on limited amounts of [OPUS-100](https://huggingface.co/datasets/Helsinki-NLP/opus-100/), combined with regex-enforced heuristics. Achieves ~84% macro-F1 score on the [SLIDE eval set](https://huggingface.co/datasets/ltg/slide) with a fraction of the training data used in the 2025 SLIDE paper.")
110
- gr.Markdown("[This project is licensed under AGPL-3.0.](https://www.gnu.org/licenses/agpl-3.0.en.html).")
111
  gr.Markdown("([GitHub](https://github.com/cloudeerie/scandiprob) | [Kaggle Notebooks](https://www.kaggle.com/code/cloudeerie/scandiprob/))")
112
 
113
  with gr.Row():
 
63
  return [no_skew, da_skew]
64
 
65
  def ScandiProb(text):
66
+ text = text.strip()
67
+ if not text:
68
  return "None", {label: 0.0 for label in labels}
69
 
70
  inputs = tokenizer(text, return_tensors="pt")
 
107
  gr.Markdown("# ScandiProb: Hybrid Language ID Classifier")
108
  gr.Markdown("### By Ian Rodriguez")
109
  gr.Markdown("Enter text to output independent probabilities that it is written in **Norwegian**, **Swedish**, **Danish**, or **None of the Above / Non-Scandinavian**.")
110
+ gr.Markdown("This model utilizes a fine-tuned [ScandiBERT](https://huggingface.co/vesteinn/ScandiBERT), trained on limited amounts of [OPUS-100](https://huggingface.co/datasets/Helsinki-NLP/opus-100/), combined with regex-enforced heuristics. Achieves ~84% macro-F1 score on the comprehensive [SLIDE eval set](https://huggingface.co/datasets/ltg/slide) with a fraction of the training data used in the 2025 SLIDE paper.")
111
+ gr.Markdown("[This project is licensed under AGPL-3.0.](https://www.gnu.org/licenses/agpl-3.0.en.html)")
112
  gr.Markdown("([GitHub](https://github.com/cloudeerie/scandiprob) | [Kaggle Notebooks](https://www.kaggle.com/code/cloudeerie/scandiprob/))")
113
 
114
  with gr.Row():