--- language: - en base_model: - meta-llama/Llama-3.2-1B-Instruct pipeline_tag: text-generation --- ```python import onnxruntime_genai as og model = og.Model('soap5_onnx') tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() # Search options - exact match to original search_options = { 'max_length': 4096, 'temperature': 0.1, 'top_p': 0.9, 'do_sample': True, 'batch_size': 1 } soap_note_prompt = """You are an expert medical professor assisting in the creation of medically accurate SOAP summaries. Please ensure the response follows the structured format: S:, O:, A:, P: without using markdown or special formatting. Create a Medical SOAP note summary from the dialogue, following these guidelines:\n S (Subjective): Summarize the patient's reported symptoms, including chief complaint and relevant history. Rely on the patient's statements as the primary source and ensure standardized terminology.\n O (Objective): Highlight critical findings such as vital signs, lab results, and imaging, emphasizing important details like the side of the body affected and specific dosages. Include normal ranges where relevant.\n A (Assessment): Offer a concise assessment combining subjective and objective data. State the primary diagnosis and any differential diagnoses, noting potential complications and the prognostic outlook.\n P (Plan): Outline the management plan, covering medication, diet, consultations, and education. Ensure to mention necessary referrals to other specialties and address compliance challenges.\n Considerations: Compile the report based solely on the transcript provided. Use concise medical jargon and abbreviations for effective doctor communication.\n Please format the summary in a clean, simple list format without using markdown or bullet points. Use 'S:', 'O:', 'A:', 'P:' directly followed by the text. Avoid any styling or special characters. TRANSCRIPT: \n""" text = input("Input: ") if not text: print("Error, input cannot be empty") exit() # Method 1: Force generation by adding a SOAP starter after the prompt full_prompt = soap_note_prompt + text # Use the most complete Llama format chat_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\nS: " prompt = chat_template.format(prompt=full_prompt) input_tokens = tokenizer.encode(prompt) print(f"Tokens in prompt: {len(input_tokens)}") params = og.GeneratorParams(model) params.set_search_options(**search_options) generator = og.Generator(model, params) generator.append_tokens(input_tokens) print("\nGenerating SOAP note...") print("S: ", end='', flush=True) # We already have "S: " in the prompt # Generate the rest of the SOAP note generated_text = "" token_count = 0 try: while not generator.is_done() and token_count < 2000: # Limit to 2000 tokens for safety generator.generate_next_token() new_token = generator.get_next_tokens()[0] decoded = tokenizer_stream.decode(new_token) # Skip if we're still in the input echo phase if token_count < 50 and (text[:20] in generated_text + decoded): token_count += 1 continue print(decoded, end='', flush=True) generated_text += decoded token_count += 1 # Stop if we see end markers if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", ""]): break except KeyboardInterrupt: print("\nInterrupted") print() # If that didn't work, try Method 2: Different prompt structure if len(generated_text.strip()) < 50 or text[:50] in generated_text: print("\n\nMethod 1 didn't work well. Trying alternative method...") del generator # Clean up # Try a simpler approach - maybe the model expects a different format simple_prompt = f"{soap_note_prompt}{text}\n\nSOAP Note:\nS: " input_tokens = tokenizer.encode(simple_prompt) params = og.GeneratorParams(model) params.set_search_options(**search_options) generator = og.Generator(model, params) generator.append_tokens(input_tokens) print("\nGenerating with simplified format...") print("S: ", end='', flush=True) generated_text = "" token_count = 0 try: while not generator.is_done() and token_count < 2000: generator.generate_next_token() new_token = generator.get_next_tokens()[0] decoded = tokenizer_stream.decode(new_token) print(decoded, end='', flush=True) generated_text += decoded token_count += 1 if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", ""]): break except KeyboardInterrupt: print("\nInterrupted") print() del generator print("\n--- Generation Complete ---") '''