Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| def chunk_text(text, chunk_size, overlap): | |
| chunks = [] | |
| start = 0 | |
| while start < len(text): | |
| end = start + chunk_size | |
| chunks.append(text[start:end]) | |
| start += (chunk_size - overlap) | |
| return chunks | |
| def main(): | |
| st.set_page_config(page_title="Text Chunker", page_icon="βοΈ", layout="centered") | |
| # Custom CSS for styling | |
| st.markdown(""" | |
| <style> | |
| .header { | |
| color: #2F4F4F; | |
| border-bottom: 2px solid #2F4F4F; | |
| padding-bottom: 10px; | |
| } | |
| .chunk-box { | |
| padding: 20px; | |
| margin: 10px 0; | |
| border-radius: 10px; | |
| background-color: #F0F2F6; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<h1 class="header">βοΈ Text Chunker</h1>', unsafe_allow_html=True) | |
| # Example text | |
| example_text = """Natural language processing (NLP) is a subfield of linguistics, computer science, | |
| and artificial intelligence concerned with the interactions between computers and human language. | |
| It focuses on how to program computers to process and analyze large amounts of natural language data. | |
| The result is a computer capable of understanding natural language in a way that is both meaningful | |
| and useful to humans.""" | |
| # Inputs | |
| input_text = st.text_area("Input Text", value=example_text, height=200) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| chunk_size = st.slider("Chunk Size (characters)", 50, 200, 100, 10) | |
| with col2: | |
| overlap = st.slider("Overlap (characters)", 0, 50, 20, 5) | |
| if overlap >= chunk_size: | |
| st.error("Overlap must be smaller than chunk size!") | |
| return | |
| # Processing | |
| if st.button("Chunk It!", type="primary"): | |
| chunks = chunk_text(input_text, chunk_size, overlap) | |
| st.markdown(f"**π {len(chunks)} Chunks Created**") | |
| for i, chunk in enumerate(chunks, 1): | |
| with st.container(): | |
| st.markdown(f""" | |
| <div class="chunk-box"> | |
| <h4>Chunk #{i} (Length: {len(chunk)})</h4> | |
| <hr style="border:1px solid #2F4F4F"> | |
| <p>{chunk}</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.success("β Chunking completed! Scroll to see all chunks.") | |
| if __name__ == "__main__": | |
| main() |