Spaces:
Sleeping
Sleeping
File size: 3,872 Bytes
266d7bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import asyncio
from fastapi import APIRouter, Request
from fastapi.responses import StreamingResponse
from src.api.models.api_models import (
AskRequest,
AskResponse,
AskStreamingResponse,
SearchResult,
UniqueTitleRequest,
UniqueTitleResponse,
)
from src.api.services.generation_service import generate_answer, get_streaming_function
from src.api.services.search_service import query_unique_titles, query_with_filters
router = APIRouter()
@router.post("/unique-titles", response_model=UniqueTitleResponse)
async def search_unique(request: Request, params: UniqueTitleRequest):
"""Returns unique article titles based on a query and optional filters.
Deduplicates results by article title.
Args:
request: FastAPI request object.
params: UniqueTitleRequest with search parameters.
Returns:
UniqueTitleResponse: List of unique titles.
"""
results = await query_unique_titles(
request=request,
query_text=params.query_text,
feed_author=params.feed_author,
feed_name=params.feed_name,
title_keywords=params.title_keywords,
limit=params.limit,
)
return {"results": results}
@router.post("/ask", response_model=AskResponse)
async def ask_with_generation(request: Request, ask: AskRequest):
"""Non-streaming question-answering endpoint using vector search and LLM.
Workflow:
1. Retrieve relevant documents (possibly duplicate titles for richer context).
2. Generate an answer with the selected LLM provider.
Args:
request: FastAPI request object.
ask: AskRequest with query, provider, and limit.
Returns:
AskResponse: Generated answer and source documents.
"""
# Step 1: Retrieve relevant documents with filters
results: list[SearchResult] = await query_with_filters(
request,
query_text=ask.query_text,
feed_author=ask.feed_author,
feed_name=ask.feed_name,
title_keywords=ask.title_keywords,
limit=ask.limit,
)
# Step 2: Generate an answer
answer_data = await generate_answer(
query=ask.query_text, contexts=results, provider=ask.provider, selected_model=ask.model
)
return AskResponse(
query=ask.query_text,
provider=ask.provider,
answer=answer_data["answer"],
sources=results,
model=answer_data.get("model", None),
finish_reason=answer_data.get("finish_reason", None),
)
@router.post("/ask/stream", response_model=AskStreamingResponse)
async def ask_with_generation_stream(request: Request, ask: AskRequest):
"""Streaming question-answering endpoint using vector search and LLM.
Workflow:
1. Retrieve relevant documents (possibly duplicate titles for richer context).
2. Stream generated answer with the selected LLM provider.
Args:
request: FastAPI request object.
ask: AskRequest with query, provider, and limit.
Returns:
StreamingResponse: Yields text chunks as plain text.
"""
# Step 1: Retrieve relevant documents with filters
results: list[SearchResult] = await query_with_filters(
request,
query_text=ask.query_text,
feed_author=ask.feed_author,
feed_name=ask.feed_name,
title_keywords=ask.title_keywords,
limit=ask.limit,
)
# Step 2: Get the streaming generator
stream_func = get_streaming_function(
provider=ask.provider, query=ask.query_text, contexts=results, selected_model=ask.model
)
# Step 3: Wrap streaming generator
async def stream_generator():
async for delta in stream_func():
yield delta
await asyncio.sleep(0) # allow event loop to handle other tasks
return StreamingResponse(stream_generator(), media_type="text/plain")
|