Johnyquest7
/

Genai_onnx

+---
+language:
+- en
+base_model:
+- meta-llama/Llama-3.2-1B-Instruct
+pipeline_tag: text-generation
+---
+```python
+import onnxruntime_genai as og
+model = og.Model('soap5_onnx')
+tokenizer = og.Tokenizer(model)
+tokenizer_stream = tokenizer.create_stream()
+# Search options - exact match to original
+search_options = {
+    'max_length': 4096,
+    'temperature': 0.1,
+    'top_p': 0.9,
+    'do_sample': True,
+    'batch_size': 1
+}
+soap_note_prompt = """You are an expert medical professor assisting in the creation of medically accurate SOAP summaries.
+Please ensure the response follows the structured format: S:, O:, A:, P: without using markdown or special formatting.
+Create a Medical SOAP note summary from the dialogue, following these guidelines:\n
+S (Subjective): Summarize the patient's reported symptoms, including chief complaint and relevant history.
+Rely on the patient's statements as the primary source and ensure standardized terminology.\n
+O (Objective): Highlight critical findings such as vital signs, lab results, and imaging, emphasizing important details like the side of the body affected and specific dosages.
+Include normal ranges where relevant.\n
+A (Assessment): Offer a concise assessment combining subjective and objective data. State the primary diagnosis and any differential diagnoses, noting potential complications and the prognostic outlook.\n
+P (Plan): Outline the management plan, covering medication, diet, consultations, and education. Ensure to mention necessary referrals to other specialties and address compliance challenges.\n
+Considerations: Compile the report based solely on the transcript provided. Use concise medical jargon and abbreviations for effective doctor communication.\n
+Please format the summary in a clean, simple list format without using markdown or bullet points. Use 'S:', 'O:', 'A:', 'P:' directly followed by the text. Avoid any styling or special characters.
+TRANSCRIPT: \n"""
+text = input("Input: ")
+if not text:
+    print("Error, input cannot be empty")
+    exit()
+# Method 1: Force generation by adding a SOAP starter after the prompt
+full_prompt = soap_note_prompt + text
+# Use the most complete Llama format
+chat_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\nS: "
+prompt = chat_template.format(prompt=full_prompt)
+input_tokens = tokenizer.encode(prompt)
+print(f"Tokens in prompt: {len(input_tokens)}")
+params = og.GeneratorParams(model)
+params.set_search_options(**search_options)
+generator = og.Generator(model, params)
+generator.append_tokens(input_tokens)
+print("\nGenerating SOAP note...")
+print("S: ", end='', flush=True)  # We already have "S: " in the prompt
+# Generate the rest of the SOAP note
+generated_text = ""
+token_count = 0
+try:
+    while not generator.is_done() and token_count < 2000:  # Limit to 2000 tokens for safety
+        generator.generate_next_token()
+        new_token = generator.get_next_tokens()[0]
+        decoded = tokenizer_stream.decode(new_token)
+        # Skip if we're still in the input echo phase
+        if token_count < 50 and (text[:20] in generated_text + decoded):
+            token_count += 1
+            continue
+        print(decoded, end='', flush=True)
+        generated_text += decoded
+        token_count += 1
+        # Stop if we see end markers
+        if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
+            break
+except KeyboardInterrupt:
+    print("\nInterrupted")
+print()
+# If that didn't work, try Method 2: Different prompt structure
+if len(generated_text.strip()) < 50 or text[:50] in generated_text:
+    print("\n\nMethod 1 didn't work well. Trying alternative method...")
+    del generator  # Clean up
+    # Try a simpler approach - maybe the model expects a different format
+    simple_prompt = f"{soap_note_prompt}{text}\n\nSOAP Note:\nS: "
+    input_tokens = tokenizer.encode(simple_prompt)
+    params = og.GeneratorParams(model)
+    params.set_search_options(**search_options)
+    generator = og.Generator(model, params)
+    generator.append_tokens(input_tokens)
+    print("\nGenerating with simplified format...")
+    print("S: ", end='', flush=True)
+    generated_text = ""
+    token_count = 0
+    try:
+        while not generator.is_done() and token_count < 2000:
+            generator.generate_next_token()
+            new_token = generator.get_next_tokens()[0]
+            decoded = tokenizer_stream.decode(new_token)
+            print(decoded, end='', flush=True)
+            generated_text += decoded
+            token_count += 1
+            if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
+                break
+    except KeyboardInterrupt:
+        print("\nInterrupted")
+    print()
+    del generator
+print("\n--- Generation Complete ---")
+'''