Coverage for ai_integration/services/rag_service.py: 90%
148 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 02:45 +0800
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-05 02:45 +0800
1"""
2RAG (Retrieval-Augmented Generation) service
3Combines semantic search with AI response generation
4"""
5import logging
6import time
7from typing import List, Dict, Any, Optional
8from django.conf import settings
9import anthropic
10from .search_service import search_service
11from .embedding_service import embedding_service
13logger = logging.getLogger(__name__)
16class RAGService:
17 """Service for RAG-based family knowledge queries"""
19 def __init__(self):
20 self.search_service = search_service
21 self.embedding_service = embedding_service
22 self.anthropic_client = anthropic.Anthropic(
23 api_key=getattr(settings, 'ANTHROPIC_API_KEY', '')
24 )
26 def generate_response(
27 self,
28 query: str,
29 max_results: int = 5,
30 similarity_threshold: float = 0.6
31 ) -> Dict[str, Any]:
32 """
33 Generate RAG response for family knowledge query
35 Args:
36 query: User's natural language query
37 max_results: Maximum search results to include in context
38 similarity_threshold: Minimum similarity for search results
40 Returns:
41 Dict with response, sources, and metadata
42 """
43 start_time = time.time()
45 try:
46 # Step 1: Determine query type
47 query_type = self._classify_query(query)
49 # Step 2: Semantic search for relevant content
50 search_results = self.search_service.semantic_search(
51 query=query,
52 limit=max_results,
53 similarity_threshold=similarity_threshold
54 )
56 # Step 3: Generate context from search results
57 context = self._build_context(search_results, query_type)
59 # Step 4: Generate AI response
60 if context:
61 response_text = self._generate_ai_response(query, context, query_type)
62 else:
63 response_text = self._generate_fallback_response(query, query_type)
65 # Step 5: Format response
66 processing_time = time.time() - start_time
68 return {
69 'query': query,
70 'response': response_text,
71 'sources': self._format_sources(search_results),
72 'metadata': {
73 'query_type': query_type,
74 'confidence': self._calculate_confidence(search_results),
75 'processing_time': round(processing_time, 2),
76 'sources_count': len(search_results),
77 'language': self._detect_language(query)
78 }
79 }
81 except Exception as e:
82 logger.error(f"RAG generation failed: {e}")
83 return self._generate_error_response(query, str(e))
85 def _classify_query(self, query: str) -> str:
86 """Classify query type based on content"""
87 query_lower = query.lower()
89 # Health-related keywords
90 health_keywords = ['health', 'medical', 'illness', 'disease', 'hereditary', 'genetic', '健康', '疾病', '遗传']
91 if any(keyword in query_lower for keyword in health_keywords):
92 return 'health_pattern'
94 # Event planning keywords
95 event_keywords = ['celebration', 'party', 'reunion', 'birthday', 'wedding', '庆祝', '聚会', '生日']
96 if any(keyword in query_lower for keyword in event_keywords):
97 return 'event_planning'
99 # Heritage/tradition keywords
100 heritage_keywords = ['tradition', 'heritage', 'recipe', 'values', 'wisdom', '传统', '文化', '智慧']
101 if any(keyword in query_lower for keyword in heritage_keywords):
102 return 'cultural_heritage'
104 # Relationship keywords
105 relationship_keywords = ['family', 'relative', 'relationship', 'cousin', '亲戚', '家人', '关系']
106 if any(keyword in query_lower for keyword in relationship_keywords):
107 return 'relationship_discovery'
109 # Memory/story keywords
110 memory_keywords = ['story', 'memory', 'remember', 'childhood', 'past', '故事', '回忆', '童年']
111 if any(keyword in query_lower for keyword in memory_keywords): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true
112 return 'memory_discovery'
114 return 'general'
116 def _build_context(self, search_results: List[Dict], query_type: str) -> str:
117 """Build context string from search results"""
118 if not search_results:
119 return ""
121 context_parts = []
122 context_parts.append("Based on family records, here is relevant information:\n")
124 for i, result in enumerate(search_results, 1):
125 content_type = result.get('content_type', 'unknown')
126 title = result.get('title', 'Untitled')
127 content = result.get('content', '')
128 similarity = result.get('similarity', 0)
130 # Format based on content type
131 if content_type == 'story':
132 context_parts.append(f"{i}. Family Story: \"{title}\"")
133 context_parts.append(f" Content: {content}")
135 # Add people if available
136 people = result.get('people', [])
137 if people:
138 context_parts.append(f" People involved: {', '.join(people[:3])}")
140 elif content_type == 'event':
141 context_parts.append(f"{i}. Family Event: \"{title}\"")
142 context_parts.append(f" Description: {content}")
144 # Add event details
145 event_type = result.get('event_type', '')
146 location = result.get('location', '')
147 if event_type: 147 ↛ 149line 147 didn't jump to line 149 because the condition on line 147 was always true
148 context_parts.append(f" Type: {event_type}")
149 if location: 149 ↛ 177line 149 didn't jump to line 177 because the condition on line 149 was always true
150 context_parts.append(f" Location: {location}")
152 elif content_type == 'heritage':
153 context_parts.append(f"{i}. Family Heritage: \"{title}\"")
154 context_parts.append(f" Description: {content}")
156 # Add heritage details
157 heritage_type = result.get('heritage_type', '')
158 importance = result.get('importance', '')
159 origin_person = result.get('origin_person', '')
160 if heritage_type: 160 ↛ 162line 160 didn't jump to line 162 because the condition on line 160 was always true
161 context_parts.append(f" Type: {heritage_type}")
162 if origin_person: 162 ↛ 177line 162 didn't jump to line 177 because the condition on line 162 was always true
163 context_parts.append(f" Origin: {origin_person}")
165 elif content_type == 'health': 165 ↛ 177line 165 didn't jump to line 177 because the condition on line 165 was always true
166 context_parts.append(f"{i}. Health Record: \"{title}\"")
167 context_parts.append(f" Details: {content}")
169 # Add health details
170 person = result.get('person', '')
171 is_hereditary = result.get('is_hereditary', False)
172 if person: 172 ↛ 174line 172 didn't jump to line 174 because the condition on line 172 was always true
173 context_parts.append(f" Person: {person}")
174 if is_hereditary: 174 ↛ 177line 174 didn't jump to line 177 because the condition on line 174 was always true
175 context_parts.append(f" Hereditary: Yes")
177 context_parts.append(f" Relevance: {similarity:.2f}\n")
179 return "\n".join(context_parts)
181 def _generate_ai_response(self, query: str, context: str, query_type: str) -> str:
182 """Generate AI response using Anthropic Claude"""
183 try:
184 # Create system prompt based on query type
185 system_prompt = self._get_system_prompt(query_type)
187 # Create user message with context
188 user_message = f"""Family Knowledge Query: {query}
190{context}
192Please provide a helpful, warm, and family-focused response based on the information above.
193Speak as if you're a knowledgeable family member sharing precious memories and insights.
194If the query is in Chinese, please respond in Chinese. Otherwise, respond in English.
195"""
197 # Generate response with Claude
198 response = self.anthropic_client.messages.create(
199 model="claude-3-sonnet-20240229",
200 max_tokens=1000,
201 temperature=0.7,
202 system=system_prompt,
203 messages=[
204 {"role": "user", "content": user_message}
205 ]
206 )
208 return response.content[0].text
210 except Exception as e:
211 logger.error(f"AI response generation failed: {e}")
212 return self._generate_fallback_response(query, query_type)
214 def _get_system_prompt(self, query_type: str) -> str:
215 """Get system prompt based on query type"""
216 base_prompt = """You are a wise and caring family knowledge keeper. You help family members
217connect with their heritage, stories, and relationships. You speak with warmth, respect for
218elders, and deep appreciation for family bonds."""
220 type_specific = {
221 'memory_discovery': " Focus on bringing family stories to life with vivid details and emotional context.",
222 'health_pattern': " Provide thoughtful health insights while emphasizing the importance of professional medical advice.",
223 'event_planning': " Suggest meaningful ways to celebrate that honor family traditions and create lasting memories.",
224 'cultural_heritage': " Share insights about family traditions and values with deep respect for cultural heritage.",
225 'relationship_discovery': " Help family members understand their connections and the importance of family bonds.",
226 'general': " Provide helpful and family-focused guidance based on the available information."
227 }
229 return base_prompt + type_specific.get(query_type, type_specific['general'])
231 def _generate_fallback_response(self, query: str, query_type: str) -> str:
232 """Generate fallback response when no relevant content is found"""
233 language = self._detect_language(query)
235 if language == 'zh-CN':
236 fallback_responses = {
237 'memory_discovery': "很抱歉,我在家庭记录中没有找到与您的问题直接相关的故事。不过,这可能是一个好机会来记录新的家庭记忆。您愿意分享一些相关的故事吗?",
238 'health_pattern': "关于您询问的健康问题,我在现有的家庭健康记录中没有找到相关信息。建议您咨询专业医生,并考虑将重要的健康信息添加到家庭记录中。",
239 'event_planning': "虽然我没有找到关于类似活动的具体记录,但我建议您可以创造新的家庭传统。考虑一下什么样的庆祝方式最能体现您家庭的价值观和喜好。",
240 'cultural_heritage': "这是一个很好的问题!虽然我没有找到相关的传统记录,但这正是开始记录家庭文化传承的好时机。",
241 'relationship_discovery': "关于家庭关系的问题,我建议您可以与长辈交流,了解更多家族史。同时,将这些珍贵的关系信息记录下来会很有价值。",
242 'general': "很抱歉,我没有找到与您的问题直接相关的家庭信息。不过,我很乐意帮助您思考如何收集和记录相关信息。"
243 }
244 else:
245 fallback_responses = {
246 'memory_discovery': "I couldn't find specific family stories related to your question in our records. This might be a wonderful opportunity to capture new family memories. Would you like to share some related stories?",
247 'health_pattern': "I don't have specific health information related to your question in our family records. I recommend consulting with healthcare professionals and considering adding important health information to your family records.",
248 'event_planning': "While I don't have records of similar events, this could be a chance to create new family traditions. Consider what type of celebration would best reflect your family's values and preferences.",
249 'cultural_heritage': "That's a wonderful question! While I don't have specific records about this tradition, this could be a perfect time to start documenting your family's cultural heritage.",
250 'relationship_discovery': "For questions about family relationships, I suggest speaking with elder family members to learn more about your family history. Recording these precious connections would be very valuable.",
251 'general': "I couldn't find information directly related to your question in our family records. However, I'd be happy to help you think about how to gather and record relevant information."
252 }
254 return fallback_responses.get(query_type, fallback_responses['general'])
256 def _detect_language(self, query: str) -> str:
257 """Simple language detection"""
258 # Check for Chinese characters
259 chinese_chars = sum(1 for char in query if '\u4e00' <= char <= '\u9fff')
260 if chinese_chars > len(query) * 0.3: # More than 30% Chinese characters
261 return 'zh-CN'
262 return 'en-US'
264 def _calculate_confidence(self, search_results: List[Dict]) -> float:
265 """Calculate confidence score based on search results"""
266 if not search_results:
267 return 0.0
269 # Average similarity of top 3 results
270 top_similarities = [r.get('similarity', 0) for r in search_results[:3]]
271 avg_similarity = sum(top_similarities) / len(top_similarities)
273 # Boost confidence if we have multiple good results
274 count_boost = min(len(search_results) * 0.1, 0.2)
276 return min(avg_similarity + count_boost, 1.0)
278 def _format_sources(self, search_results: List[Dict]) -> List[Dict]:
279 """Format search results as sources"""
280 sources = []
282 for result in search_results:
283 source = {
284 'type': result.get('content_type', 'unknown'),
285 'id': result.get('id'),
286 'title': result.get('title', 'Untitled'),
287 'relevance': round(result.get('similarity', 0), 3)
288 }
290 # Add type-specific fields
291 content_type = result.get('content_type')
292 if content_type == 'story':
293 source['story_type'] = result.get('story_type', '')
294 source['people'] = result.get('people', [])[:2] # Limit to 2 people
295 elif content_type == 'event': 295 ↛ 298line 295 didn't jump to line 298 because the condition on line 295 was always true
296 source['event_type'] = result.get('event_type', '')
297 source['date'] = result.get('start_date', '')
298 elif content_type == 'heritage':
299 source['heritage_type'] = result.get('heritage_type', '')
300 source['importance'] = result.get('importance', '')
301 elif content_type == 'health':
302 source['person'] = result.get('person', '')
303 source['is_hereditary'] = result.get('is_hereditary', False)
305 sources.append(source)
307 return sources
309 def _generate_error_response(self, query: str, error: str) -> Dict[str, Any]:
310 """Generate error response"""
311 language = self._detect_language(query)
313 if language == 'zh-CN':
314 error_message = "抱歉,处理您的问题时遇到了技术问题。请稍后再试,或者联系系统管理员。"
315 else:
316 error_message = "I'm sorry, but I encountered a technical issue while processing your question. Please try again later or contact the system administrator."
318 return {
319 'query': query,
320 'response': error_message,
321 'sources': [],
322 'metadata': {
323 'query_type': 'error',
324 'confidence': 0.0,
325 'processing_time': 0.0,
326 'sources_count': 0,
327 'language': language,
328 'error': error
329 }
330 }
333# Global service instance
334rag_service = RAGService()