Spaces:
Running
Running
| <html> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width" /> | |
| <title>Parquet Visualization Studio</title> | |
| <link rel="stylesheet" href="style.css" /> | |
| <script src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-mvp.wasm.js"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-browser-mvp.worker.js"></script> | |
| <script type="module" src="https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/dist/duckdb-browser-mvp.worker.js"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/vega@5"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>π Parquet Visualization Studio</h1> | |
| <p class="subtitle">Visualize parquet files with interactive charts</p> | |
| <form id="queryForm"> | |
| <div class="form-group"> | |
| <label for="urlSelect">Select Example Dataset</label> | |
| <select id="urlSelect"> | |
| <option value="">-- Choose a dataset or enter custom URL below --</option> | |
| <option value="https://huggingface.co/datasets/PleIAs/SYNTH/resolve/refs%2Fconvert%2Fparquet/default/partial-train/0000.parquet">PleIAs/SYNTH</option> | |
| <option value="https://huggingface.co/datasets/facebook/omnilingual-asr-corpus/resolve/refs%2Fconvert%2Fparquet/gby_Latn/train/0000.parquet">facebook/omnilingual-asr-corpus</option> | |
| <option value="https://example.com/dataset3.parquet">Dataset 3</option> | |
| <option value="https://example.com/dataset4.parquet">Dataset 4</option> | |
| <option value="https://example.com/dataset5.parquet">Dataset 5</option> | |
| <option value="https://example.com/dataset6.parquet">Dataset 6</option> | |
| <option value="https://example.com/dataset7.parquet">Dataset 7</option> | |
| <option value="https://example.com/dataset8.parquet">Dataset 8</option> | |
| <option value="https://example.com/dataset9.parquet">Dataset 9</option> | |
| <option value="https://example.com/dataset10.parquet">Dataset 10</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label for="parquetUrl">Parquet File URL</label> | |
| <input | |
| type="text" | |
| id="parquetUrl" | |
| placeholder="https://example.com/data.parquet" | |
| required | |
| /> | |
| </div> | |
| <button type="submit" id="submitBtn">Load Dataset</button> | |
| </form> | |
| <div id="status" class="status"></div> | |
| <div id="visualizationSection" class="visualization-section" style="display: none;"> | |
| <h2>Create Visualization</h2> | |
| <div class="form-group"> | |
| <label for="hfToken">Hugging Face Token (required for LLM)</label> | |
| <input | |
| type="password" | |
| id="hfToken" | |
| placeholder="Enter your HF token with Inference Providers permission" | |
| /> | |
| <small>Get a token from <a href="https://huggingface.co/settings/tokens" target="_blank">HF Settings</a> with "Make calls to Inference Providers" permission</small> | |
| </div> | |
| <div class="form-group"> | |
| <label for="vizPrompt">Describe the visualization you want</label> | |
| <textarea | |
| id="vizPrompt" | |
| rows="3" | |
| placeholder="e.g., Show a scatter plot of price vs quantity, Create a bar chart showing count by category..." | |
| ></textarea> | |
| </div> | |
| <button type="button" id="generateVizBtn">Generate Visualization</button> | |
| <div id="vizContainer" class="viz-container"></div> | |
| </div> | |
| </div> | |
| <script type="module"> | |
| import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@latest/+esm'; | |
| let db = null; | |
| let conn = null; | |
| let currentDatasetUrl = null; | |
| let columnInfo = []; | |
| // Initialize DuckDB | |
| async function initDuckDB() { | |
| const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); | |
| const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); | |
| const worker_url = URL.createObjectURL( | |
| new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) | |
| ); | |
| const worker = new Worker(worker_url); | |
| const logger = new duckdb.ConsoleLogger(); | |
| db = new duckdb.AsyncDuckDB(logger, worker); | |
| await db.instantiate(bundle.mainModule, bundle.pthreadWorker); | |
| URL.revokeObjectURL(worker_url); | |
| conn = await db.connect(); | |
| } | |
| // Update status message | |
| function setStatus(message, type = 'info') { | |
| const statusEl = document.getElementById('status'); | |
| statusEl.textContent = message; | |
| statusEl.className = `status status-${type}`; | |
| statusEl.style.display = 'block'; | |
| } | |
| // Determine if a DuckDB type is a complex type (struct, list, map, etc.) | |
| function isComplexType(type) { | |
| const complexTypes = ['STRUCT', 'LIST', 'MAP', 'UNION', 'ARRAY']; | |
| return complexTypes.some(t => type.toUpperCase().startsWith(t)); | |
| } | |
| // Determine if a DuckDB type is numeric | |
| function isNumericType(type) { | |
| // First check if it's a complex type | |
| if (isComplexType(type)) return false; | |
| const numericTypes = ['TINYINT', 'SMALLINT', 'INTEGER', 'BIGINT', 'HUGEINT', | |
| 'FLOAT', 'DOUBLE', 'DECIMAL', 'NUMERIC', 'REAL']; | |
| return numericTypes.some(t => type.toUpperCase().startsWith(t)); | |
| } | |
| // Determine if a DuckDB type is text | |
| function isTextType(type) { | |
| // First check if it's a complex type | |
| if (isComplexType(type)) return false; | |
| const textTypes = ['VARCHAR', 'CHAR', 'TEXT', 'STRING']; | |
| return textTypes.some(t => type.toUpperCase().startsWith(t)); | |
| } | |
| // Load dataset: initialize DuckDB, drop old file, and register new parquet file | |
| async function loadDataset(url) { | |
| // Initialize DuckDB if not already done | |
| if (!db) { | |
| await initDuckDB(); | |
| } | |
| // Drop existing file registration if it exists | |
| try { | |
| await db.dropFile('data.parquet'); | |
| } catch {} | |
| // Register the parquet file from URL | |
| await db.registerFileURL( | |
| 'data.parquet', | |
| url, | |
| duckdb.DuckDBDataProtocol.HTTP, | |
| false | |
| ); | |
| } | |
| // Detect columns and their types from the dataset | |
| async function detectColumns(url) { | |
| try { | |
| setStatus('Detecting column types...', 'info'); | |
| // Load the dataset | |
| await loadDataset(url); | |
| // Query to get column information | |
| const result = await conn.query("DESCRIBE 'data.parquet'"); | |
| const rows = result.toArray(); | |
| columnInfo = rows.map(row => ({ | |
| name: row.column_name, | |
| type: row.column_type | |
| })); | |
| setStatus(`Detected ${columnInfo.length} columns`, 'success'); | |
| showVisualizationSection(); | |
| } catch (error) { | |
| console.error('Error detecting columns:', error); | |
| setStatus(`Error detecting columns: ${error.message}`, 'error'); | |
| columnInfo = []; | |
| } | |
| } | |
| // Show visualization section after dataset is loaded | |
| function showVisualizationSection() { | |
| const vizSection = document.getElementById('visualizationSection'); | |
| if (columnInfo.length > 0) { | |
| vizSection.style.display = 'block'; | |
| } else { | |
| vizSection.style.display = 'none'; | |
| setStatus('No columns found in dataset', 'error'); | |
| } | |
| } | |
| // Handle form submission | |
| async function handleSubmit(e) { | |
| e.preventDefault(); | |
| const parquetUrl = document.getElementById('parquetUrl').value.trim(); | |
| const submitBtn = document.getElementById('submitBtn'); | |
| if (!parquetUrl) { | |
| setStatus('Please provide a parquet URL.', 'error'); | |
| return; | |
| } | |
| try { | |
| submitBtn.disabled = true; | |
| submitBtn.textContent = 'Loading...'; | |
| // Load dataset and detect columns | |
| currentDatasetUrl = parquetUrl; | |
| await detectColumns(parquetUrl); | |
| } catch (error) { | |
| console.error('Error:', error); | |
| setStatus(`Error: ${error.message}`, 'error'); | |
| } finally { | |
| submitBtn.disabled = false; | |
| submitBtn.textContent = 'Load Dataset'; | |
| } | |
| } | |
| // Handle dropdown selection | |
| document.getElementById('urlSelect').addEventListener('change', async function(e) { | |
| const selectedUrl = e.target.value; | |
| if (selectedUrl) { | |
| document.getElementById('parquetUrl').value = selectedUrl; | |
| currentDatasetUrl = selectedUrl; | |
| await detectColumns(selectedUrl); | |
| } | |
| }); | |
| // Handle manual URL input (detect when user blurs or presses enter) | |
| document.getElementById('parquetUrl').addEventListener('blur', async function(e) { | |
| const url = e.target.value.trim(); | |
| if (url && url !== currentDatasetUrl) { | |
| currentDatasetUrl = url; | |
| await detectColumns(url); | |
| } | |
| }); | |
| // Generate Vega-Lite spec using LLM | |
| async function generateVisualization(prompt, hfToken) { | |
| const vizContainer = document.getElementById('vizContainer'); | |
| vizContainer.innerHTML = ''; | |
| try { | |
| setStatus('Generating visualization with LLM...', 'info'); | |
| // Prepare column information for the LLM | |
| const columnDescriptions = columnInfo.map(col => `- ${col.name}: ${col.type}`).join('\n'); | |
| // Create system prompt | |
| const systemPrompt = `You are a data visualization assistant that generates Vega-Lite specifications. | |
| Available dataset columns: | |
| ${columnDescriptions} | |
| Instructions: | |
| 1. Generate a valid Vega-Lite v5 specification based on the user's request | |
| 2. Use ONLY columns that exist in the dataset above | |
| 3. The data will be provided as an array of objects in the "data.values" field | |
| 4. Output ONLY the JSON specification, no explanations or markdown | |
| 5. Do not include the data itself, just reference fields by name | |
| 6. Include appropriate width and height (e.g., 600x400) | |
| 7. Make sure the spec is complete and valid | |
| Output only the JSON spec starting with { and ending with }.`; | |
| // Call HF Inference API | |
| const response = await fetch( | |
| "https://router.huggingface.co/v1/chat/completions", | |
| { | |
| method: "POST", | |
| headers: { | |
| Authorization: `Bearer ${hfToken}`, | |
| "Content-Type": "application/json", | |
| }, | |
| body: JSON.stringify({ | |
| model: "deepseek-ai/DeepSeek-R1", | |
| messages: [ | |
| { | |
| role: "system", | |
| content: systemPrompt | |
| }, | |
| { | |
| role: "user", | |
| content: prompt | |
| } | |
| ], | |
| temperature: 0.7, | |
| max_tokens: 2000 | |
| }), | |
| } | |
| ); | |
| if (!response.ok) { | |
| throw new Error(`API request failed: ${response.status} ${response.statusText}`); | |
| } | |
| const data = await response.json(); | |
| const vegaSpec = data.choices[0].message.content; | |
| // Parse and validate the Vega-Lite spec | |
| let spec; | |
| try { | |
| // Try to extract JSON if wrapped in markdown code blocks | |
| let jsonStr = vegaSpec.trim(); | |
| if (jsonStr.startsWith('```')) { | |
| jsonStr = jsonStr.replace(/```json\n?/g, '').replace(/```\n?/g, ''); | |
| } | |
| spec = JSON.parse(jsonStr); | |
| } catch (e) { | |
| throw new Error(`Failed to parse LLM response as JSON: ${e.message}`); | |
| } | |
| // Fetch data for the visualization | |
| setStatus('Fetching data for visualization...', 'info'); | |
| const query = `SELECT * FROM 'data.parquet' LIMIT 1000`; | |
| const result = await conn.query(query); | |
| const dataArray = result.toArray(); | |
| // Inject data into the spec | |
| spec.data = { values: dataArray }; | |
| // Render the visualization | |
| setStatus('Rendering visualization...', 'info'); | |
| await vegaEmbed('#vizContainer', spec); | |
| setStatus('Visualization generated successfully!', 'success'); | |
| } catch (error) { | |
| console.error('Error generating visualization:', error); | |
| setStatus(`Error: ${error.message}`, 'error'); | |
| } | |
| } | |
| // Handle generate visualization button | |
| document.getElementById('generateVizBtn').addEventListener('click', async function() { | |
| const prompt = document.getElementById('vizPrompt').value.trim(); | |
| const hfToken = document.getElementById('hfToken').value.trim(); | |
| if (!prompt) { | |
| setStatus('Please enter a visualization prompt', 'error'); | |
| return; | |
| } | |
| if (!hfToken) { | |
| setStatus('Please enter your Hugging Face token', 'error'); | |
| return; | |
| } | |
| await generateVisualization(prompt, hfToken); | |
| }); | |
| // Set up event listeners | |
| document.getElementById('queryForm').addEventListener('submit', handleSubmit); | |
| // Initialize on load | |
| setStatus('Ready to query parquet files!', 'success'); | |
| </script> | |
| </body> | |
| </html> | |