Spaces:
Running
Running
| import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0'; | |
| // Since we will download the model from the Hugging Face Hub, we can skip the local model check | |
| env.allowLocalModels = false; | |
| // We adopt the singleton pattern to enable lazy-loading of the model and processor. | |
| export class SegmentAnythingSingleton { | |
| static model_id = 'Xenova/slimsam-77-uniform'; | |
| static model; | |
| static processor; | |
| static quantized = true; | |
| static getInstance() { | |
| if (!this.model) { | |
| this.model = SamModel.from_pretrained(this.model_id, { | |
| quantized: this.quantized, | |
| }); | |
| } | |
| if (!this.processor) { | |
| this.processor = AutoProcessor.from_pretrained(this.model_id); | |
| } | |
| return Promise.all([this.model, this.processor]); | |
| } | |
| } | |
| // State variables | |
| let image_embeddings = null; | |
| let image_inputs = null; | |
| let ready = false; | |
| self.onmessage = async (e) => { | |
| const [model, processor] = await SegmentAnythingSingleton.getInstance(); | |
| if (!ready) { | |
| // Indicate that we are ready to accept requests | |
| ready = true; | |
| self.postMessage({ | |
| type: 'ready', | |
| }); | |
| } | |
| const { type, data } = e.data; | |
| if (type === 'reset') { | |
| image_inputs = null; | |
| image_embeddings = null; | |
| } else if (type === 'segment') { | |
| // Indicate that we are starting to segment the image | |
| self.postMessage({ | |
| type: 'segment_result', | |
| data: 'start', | |
| }); | |
| // Read the image and recompute image embeddings | |
| const image = await RawImage.read(e.data.data); | |
| image_inputs = await processor(image); | |
| image_embeddings = await model.get_image_embeddings(image_inputs) | |
| // Indicate that we have computed the image embeddings, and we are ready to accept decoding requests | |
| self.postMessage({ | |
| type: 'segment_result', | |
| data: 'done', | |
| }); | |
| } else if (type === 'decode') { | |
| // Prepare inputs for decoding | |
| const reshaped = image_inputs.reshaped_input_sizes[0]; | |
| const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]]) | |
| const labels = data.map(x => BigInt(x.label)); | |
| const input_points = new Tensor( | |
| 'float32', | |
| points.flat(Infinity), | |
| [1, 1, points.length, 2], | |
| ) | |
| const input_labels = new Tensor( | |
| 'int64', | |
| labels.flat(Infinity), | |
| [1, 1, labels.length], | |
| ) | |
| // Generate the mask | |
| const outputs = await model({ | |
| ...image_embeddings, | |
| input_points, | |
| input_labels, | |
| }) | |
| // Post-process the mask | |
| const masks = await processor.post_process_masks( | |
| outputs.pred_masks, | |
| image_inputs.original_sizes, | |
| image_inputs.reshaped_input_sizes, | |
| ); | |
| // Send the result back to the main thread | |
| self.postMessage({ | |
| type: 'decode_result', | |
| data: { | |
| mask: RawImage.fromTensor(masks[0][0]), | |
| scores: outputs.iou_scores.data, | |
| }, | |
| }); | |
| } else { | |
| throw new Error(`Unknown message type: ${type}`); | |
| } | |
| } | |