Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import streamlit as st | |
| import numpy as np | |
| import torch | |
| from transformers import AlbertTokenizer | |
| import io | |
| import time | |
| def load_model(model_name): | |
| if model_name.startswith('albert'): | |
| tokenizer = AlbertTokenizer.from_pretrained(model_name) | |
| return tokenizer | |
| if __name__=='__main__': | |
| # Config | |
| max_width = 1500 | |
| padding_top = 0 | |
| padding_right = 2 | |
| padding_bottom = 0 | |
| padding_left = 2 | |
| define_margins = f""" | |
| <style> | |
| .appview-container .main .block-container{{ | |
| max-width: {max_width}px; | |
| padding-top: {padding_top}rem; | |
| padding-right: {padding_right}rem; | |
| padding-left: {padding_left}rem; | |
| padding-bottom: {padding_bottom}rem; | |
| }} | |
| </style> | |
| """ | |
| hide_table_row_index = """ | |
| <style> | |
| tbody th {display:none} | |
| .blank {display:none} | |
| </style> | |
| """ | |
| st.markdown(define_margins, unsafe_allow_html=True) | |
| st.markdown(hide_table_row_index, unsafe_allow_html=True) | |
| # Title | |
| st.header("Tokenizer Demo") | |
| tokenizer = load_model('albert-xxlarge-v2') | |
| sent_cols = st.columns(2) | |
| num_tokens = {} | |
| for sent_id, sent_col in enumerate(sent_cols): | |
| with sent_col: | |
| sentence = st.text_input(f'Sentence {sent_id+1}') | |
| input_sent = tokenizer(sentence)['input_ids'] | |
| decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]] | |
| num_tokens[f'sent_{sent_id}'] = len(decoded_sent) | |
| char_nums = [len(word)+2 for word in decoded_sent] | |
| word_cols = st.columns(char_nums) | |
| for word_col,word in zip(word_cols,decoded_sent): | |
| with word_col: | |
| st.write(word) | |
| st.write(f'{len(decoded_sent)} tokens') | |
| if num_tokens[f'sent_1']==num_tokens[f'sent_2']: | |
| st.subheader('Matched!') | |
| else: | |
| st.subheader('Not Matched...') | |