Coverage for ai_integration/services/rag_service.py: 90%

148 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-05 02:45 +0800

1""" 

2RAG (Retrieval-Augmented Generation) service 

3Combines semantic search with AI response generation 

4""" 

5import logging 

6import time 

7from typing import List, Dict, Any, Optional 

8from django.conf import settings 

9import anthropic 

10from .search_service import search_service 

11from .embedding_service import embedding_service 

12 

13logger = logging.getLogger(__name__) 

14 

15 

16class RAGService: 

17 """Service for RAG-based family knowledge queries""" 

18 

19 def __init__(self): 

20 self.search_service = search_service 

21 self.embedding_service = embedding_service 

22 self.anthropic_client = anthropic.Anthropic( 

23 api_key=getattr(settings, 'ANTHROPIC_API_KEY', '') 

24 ) 

25 

26 def generate_response( 

27 self, 

28 query: str, 

29 max_results: int = 5, 

30 similarity_threshold: float = 0.6 

31 ) -> Dict[str, Any]: 

32 """ 

33 Generate RAG response for family knowledge query 

34  

35 Args: 

36 query: User's natural language query 

37 max_results: Maximum search results to include in context 

38 similarity_threshold: Minimum similarity for search results 

39  

40 Returns: 

41 Dict with response, sources, and metadata 

42 """ 

43 start_time = time.time() 

44 

45 try: 

46 # Step 1: Determine query type 

47 query_type = self._classify_query(query) 

48 

49 # Step 2: Semantic search for relevant content 

50 search_results = self.search_service.semantic_search( 

51 query=query, 

52 limit=max_results, 

53 similarity_threshold=similarity_threshold 

54 ) 

55 

56 # Step 3: Generate context from search results 

57 context = self._build_context(search_results, query_type) 

58 

59 # Step 4: Generate AI response 

60 if context: 

61 response_text = self._generate_ai_response(query, context, query_type) 

62 else: 

63 response_text = self._generate_fallback_response(query, query_type) 

64 

65 # Step 5: Format response 

66 processing_time = time.time() - start_time 

67 

68 return { 

69 'query': query, 

70 'response': response_text, 

71 'sources': self._format_sources(search_results), 

72 'metadata': { 

73 'query_type': query_type, 

74 'confidence': self._calculate_confidence(search_results), 

75 'processing_time': round(processing_time, 2), 

76 'sources_count': len(search_results), 

77 'language': self._detect_language(query) 

78 } 

79 } 

80 

81 except Exception as e: 

82 logger.error(f"RAG generation failed: {e}") 

83 return self._generate_error_response(query, str(e)) 

84 

85 def _classify_query(self, query: str) -> str: 

86 """Classify query type based on content""" 

87 query_lower = query.lower() 

88 

89 # Health-related keywords 

90 health_keywords = ['health', 'medical', 'illness', 'disease', 'hereditary', 'genetic', '健康', '疾病', '遗传'] 

91 if any(keyword in query_lower for keyword in health_keywords): 

92 return 'health_pattern' 

93 

94 # Event planning keywords 

95 event_keywords = ['celebration', 'party', 'reunion', 'birthday', 'wedding', '庆祝', '聚会', '生日'] 

96 if any(keyword in query_lower for keyword in event_keywords): 

97 return 'event_planning' 

98 

99 # Heritage/tradition keywords 

100 heritage_keywords = ['tradition', 'heritage', 'recipe', 'values', 'wisdom', '传统', '文化', '智慧'] 

101 if any(keyword in query_lower for keyword in heritage_keywords): 

102 return 'cultural_heritage' 

103 

104 # Relationship keywords 

105 relationship_keywords = ['family', 'relative', 'relationship', 'cousin', '亲戚', '家人', '关系'] 

106 if any(keyword in query_lower for keyword in relationship_keywords): 

107 return 'relationship_discovery' 

108 

109 # Memory/story keywords 

110 memory_keywords = ['story', 'memory', 'remember', 'childhood', 'past', '故事', '回忆', '童年'] 

111 if any(keyword in query_lower for keyword in memory_keywords): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 return 'memory_discovery' 

113 

114 return 'general' 

115 

116 def _build_context(self, search_results: List[Dict], query_type: str) -> str: 

117 """Build context string from search results""" 

118 if not search_results: 

119 return "" 

120 

121 context_parts = [] 

122 context_parts.append("Based on family records, here is relevant information:\n") 

123 

124 for i, result in enumerate(search_results, 1): 

125 content_type = result.get('content_type', 'unknown') 

126 title = result.get('title', 'Untitled') 

127 content = result.get('content', '') 

128 similarity = result.get('similarity', 0) 

129 

130 # Format based on content type 

131 if content_type == 'story': 

132 context_parts.append(f"{i}. Family Story: \"{title}\"") 

133 context_parts.append(f" Content: {content}") 

134 

135 # Add people if available 

136 people = result.get('people', []) 

137 if people: 

138 context_parts.append(f" People involved: {', '.join(people[:3])}") 

139 

140 elif content_type == 'event': 

141 context_parts.append(f"{i}. Family Event: \"{title}\"") 

142 context_parts.append(f" Description: {content}") 

143 

144 # Add event details 

145 event_type = result.get('event_type', '') 

146 location = result.get('location', '') 

147 if event_type: 147 ↛ 149line 147 didn't jump to line 149 because the condition on line 147 was always true

148 context_parts.append(f" Type: {event_type}") 

149 if location: 149 ↛ 177line 149 didn't jump to line 177 because the condition on line 149 was always true

150 context_parts.append(f" Location: {location}") 

151 

152 elif content_type == 'heritage': 

153 context_parts.append(f"{i}. Family Heritage: \"{title}\"") 

154 context_parts.append(f" Description: {content}") 

155 

156 # Add heritage details 

157 heritage_type = result.get('heritage_type', '') 

158 importance = result.get('importance', '') 

159 origin_person = result.get('origin_person', '') 

160 if heritage_type: 160 ↛ 162line 160 didn't jump to line 162 because the condition on line 160 was always true

161 context_parts.append(f" Type: {heritage_type}") 

162 if origin_person: 162 ↛ 177line 162 didn't jump to line 177 because the condition on line 162 was always true

163 context_parts.append(f" Origin: {origin_person}") 

164 

165 elif content_type == 'health': 165 ↛ 177line 165 didn't jump to line 177 because the condition on line 165 was always true

166 context_parts.append(f"{i}. Health Record: \"{title}\"") 

167 context_parts.append(f" Details: {content}") 

168 

169 # Add health details 

170 person = result.get('person', '') 

171 is_hereditary = result.get('is_hereditary', False) 

172 if person: 172 ↛ 174line 172 didn't jump to line 174 because the condition on line 172 was always true

173 context_parts.append(f" Person: {person}") 

174 if is_hereditary: 174 ↛ 177line 174 didn't jump to line 177 because the condition on line 174 was always true

175 context_parts.append(f" Hereditary: Yes") 

176 

177 context_parts.append(f" Relevance: {similarity:.2f}\n") 

178 

179 return "\n".join(context_parts) 

180 

181 def _generate_ai_response(self, query: str, context: str, query_type: str) -> str: 

182 """Generate AI response using Anthropic Claude""" 

183 try: 

184 # Create system prompt based on query type 

185 system_prompt = self._get_system_prompt(query_type) 

186 

187 # Create user message with context 

188 user_message = f"""Family Knowledge Query: {query} 

189 

190{context} 

191 

192Please provide a helpful, warm, and family-focused response based on the information above.  

193Speak as if you're a knowledgeable family member sharing precious memories and insights. 

194If the query is in Chinese, please respond in Chinese. Otherwise, respond in English. 

195""" 

196 

197 # Generate response with Claude 

198 response = self.anthropic_client.messages.create( 

199 model="claude-3-sonnet-20240229", 

200 max_tokens=1000, 

201 temperature=0.7, 

202 system=system_prompt, 

203 messages=[ 

204 {"role": "user", "content": user_message} 

205 ] 

206 ) 

207 

208 return response.content[0].text 

209 

210 except Exception as e: 

211 logger.error(f"AI response generation failed: {e}") 

212 return self._generate_fallback_response(query, query_type) 

213 

214 def _get_system_prompt(self, query_type: str) -> str: 

215 """Get system prompt based on query type""" 

216 base_prompt = """You are a wise and caring family knowledge keeper. You help family members  

217connect with their heritage, stories, and relationships. You speak with warmth, respect for  

218elders, and deep appreciation for family bonds.""" 

219 

220 type_specific = { 

221 'memory_discovery': " Focus on bringing family stories to life with vivid details and emotional context.", 

222 'health_pattern': " Provide thoughtful health insights while emphasizing the importance of professional medical advice.", 

223 'event_planning': " Suggest meaningful ways to celebrate that honor family traditions and create lasting memories.", 

224 'cultural_heritage': " Share insights about family traditions and values with deep respect for cultural heritage.", 

225 'relationship_discovery': " Help family members understand their connections and the importance of family bonds.", 

226 'general': " Provide helpful and family-focused guidance based on the available information." 

227 } 

228 

229 return base_prompt + type_specific.get(query_type, type_specific['general']) 

230 

231 def _generate_fallback_response(self, query: str, query_type: str) -> str: 

232 """Generate fallback response when no relevant content is found""" 

233 language = self._detect_language(query) 

234 

235 if language == 'zh-CN': 

236 fallback_responses = { 

237 'memory_discovery': "很抱歉,我在家庭记录中没有找到与您的问题直接相关的故事。不过,这可能是一个好机会来记录新的家庭记忆。您愿意分享一些相关的故事吗?", 

238 'health_pattern': "关于您询问的健康问题,我在现有的家庭健康记录中没有找到相关信息。建议您咨询专业医生,并考虑将重要的健康信息添加到家庭记录中。", 

239 'event_planning': "虽然我没有找到关于类似活动的具体记录,但我建议您可以创造新的家庭传统。考虑一下什么样的庆祝方式最能体现您家庭的价值观和喜好。", 

240 'cultural_heritage': "这是一个很好的问题!虽然我没有找到相关的传统记录,但这正是开始记录家庭文化传承的好时机。", 

241 'relationship_discovery': "关于家庭关系的问题,我建议您可以与长辈交流,了解更多家族史。同时,将这些珍贵的关系信息记录下来会很有价值。", 

242 'general': "很抱歉,我没有找到与您的问题直接相关的家庭信息。不过,我很乐意帮助您思考如何收集和记录相关信息。" 

243 } 

244 else: 

245 fallback_responses = { 

246 'memory_discovery': "I couldn't find specific family stories related to your question in our records. This might be a wonderful opportunity to capture new family memories. Would you like to share some related stories?", 

247 'health_pattern': "I don't have specific health information related to your question in our family records. I recommend consulting with healthcare professionals and considering adding important health information to your family records.", 

248 'event_planning': "While I don't have records of similar events, this could be a chance to create new family traditions. Consider what type of celebration would best reflect your family's values and preferences.", 

249 'cultural_heritage': "That's a wonderful question! While I don't have specific records about this tradition, this could be a perfect time to start documenting your family's cultural heritage.", 

250 'relationship_discovery': "For questions about family relationships, I suggest speaking with elder family members to learn more about your family history. Recording these precious connections would be very valuable.", 

251 'general': "I couldn't find information directly related to your question in our family records. However, I'd be happy to help you think about how to gather and record relevant information." 

252 } 

253 

254 return fallback_responses.get(query_type, fallback_responses['general']) 

255 

256 def _detect_language(self, query: str) -> str: 

257 """Simple language detection""" 

258 # Check for Chinese characters 

259 chinese_chars = sum(1 for char in query if '\u4e00' <= char <= '\u9fff') 

260 if chinese_chars > len(query) * 0.3: # More than 30% Chinese characters 

261 return 'zh-CN' 

262 return 'en-US' 

263 

264 def _calculate_confidence(self, search_results: List[Dict]) -> float: 

265 """Calculate confidence score based on search results""" 

266 if not search_results: 

267 return 0.0 

268 

269 # Average similarity of top 3 results 

270 top_similarities = [r.get('similarity', 0) for r in search_results[:3]] 

271 avg_similarity = sum(top_similarities) / len(top_similarities) 

272 

273 # Boost confidence if we have multiple good results 

274 count_boost = min(len(search_results) * 0.1, 0.2) 

275 

276 return min(avg_similarity + count_boost, 1.0) 

277 

278 def _format_sources(self, search_results: List[Dict]) -> List[Dict]: 

279 """Format search results as sources""" 

280 sources = [] 

281 

282 for result in search_results: 

283 source = { 

284 'type': result.get('content_type', 'unknown'), 

285 'id': result.get('id'), 

286 'title': result.get('title', 'Untitled'), 

287 'relevance': round(result.get('similarity', 0), 3) 

288 } 

289 

290 # Add type-specific fields 

291 content_type = result.get('content_type') 

292 if content_type == 'story': 

293 source['story_type'] = result.get('story_type', '') 

294 source['people'] = result.get('people', [])[:2] # Limit to 2 people 

295 elif content_type == 'event': 295 ↛ 298line 295 didn't jump to line 298 because the condition on line 295 was always true

296 source['event_type'] = result.get('event_type', '') 

297 source['date'] = result.get('start_date', '') 

298 elif content_type == 'heritage': 

299 source['heritage_type'] = result.get('heritage_type', '') 

300 source['importance'] = result.get('importance', '') 

301 elif content_type == 'health': 

302 source['person'] = result.get('person', '') 

303 source['is_hereditary'] = result.get('is_hereditary', False) 

304 

305 sources.append(source) 

306 

307 return sources 

308 

309 def _generate_error_response(self, query: str, error: str) -> Dict[str, Any]: 

310 """Generate error response""" 

311 language = self._detect_language(query) 

312 

313 if language == 'zh-CN': 

314 error_message = "抱歉,处理您的问题时遇到了技术问题。请稍后再试,或者联系系统管理员。" 

315 else: 

316 error_message = "I'm sorry, but I encountered a technical issue while processing your question. Please try again later or contact the system administrator." 

317 

318 return { 

319 'query': query, 

320 'response': error_message, 

321 'sources': [], 

322 'metadata': { 

323 'query_type': 'error', 

324 'confidence': 0.0, 

325 'processing_time': 0.0, 

326 'sources_count': 0, 

327 'language': language, 

328 'error': error 

329 } 

330 } 

331 

332 

333# Global service instance 

334rag_service = RAGService()