Coverage for ai_integration/services/rag

1"""

2RAG (Retrieval-Augmented Generation) service

3Combines semantic search with AI response generation

4"""

5import logging

6import time

7from typing import List, Dict, Any, Optional

8from django.conf import settings

9import anthropic

10from .search_service import search_service

11from .embedding_service import embedding_service

13logger = logging.getLogger(__name__)

16class RAGService:

17 """Service for RAG-based family knowledge queries"""

19 def __init__(self):

20 self.search_service = search_service

21 self.embedding_service = embedding_service

22 self.anthropic_client = anthropic.Anthropic(

23 api_key=getattr(settings, 'ANTHROPIC_API_KEY', '')

24 )

26 def generate_response(

27 self,

28 query: str,

29 max_results: int = 5,

30 similarity_threshold: float = 0.6

31 ) -> Dict[str, Any]:

32 """

33 Generate RAG response for family knowledge query

35 Args:

36 query: User's natural language query

37 max_results: Maximum search results to include in context

38 similarity_threshold: Minimum similarity for search results

40 Returns:

41 Dict with response, sources, and metadata

42 """

43 start_time = time.time()

45 try:

46 # Step 1: Determine query type

47 query_type = self._classify_query(query)

49 # Step 2: Semantic search for relevant content

50 search_results = self.search_service.semantic_search(

51 query=query,

52 limit=max_results,

53 similarity_threshold=similarity_threshold

54 )

56 # Step 3: Generate context from search results

57 context = self._build_context(search_results, query_type)

59 # Step 4: Generate AI response

60 if context:

61 response_text = self._generate_ai_response(query, context, query_type)

62 else:

63 response_text = self._generate_fallback_response(query, query_type)

65 # Step 5: Format response

66 processing_time = time.time() - start_time

68 return {

69 'query': query,

70 'response': response_text,

71 'sources': self._format_sources(search_results),

72 'metadata': {

73 'query_type': query_type,

74 'confidence': self._calculate_confidence(search_results),

75 'processing_time': round(processing_time, 2),

76 'sources_count': len(search_results),

77 'language': self._detect_language(query)

78 }

79 }

81 except Exception as e:

82 logger.error(f"RAG generation failed: {e}")

83 return self._generate_error_response(query, str(e))

85 def _classify_query(self, query: str) -> str:

86 """Classify query type based on content"""

87 query_lower = query.lower()

89 # Health-related keywords

90 health_keywords = ['health', 'medical', 'illness', 'disease', 'hereditary', 'genetic', '健康', '疾病', '遗传']

91 if any(keyword in query_lower for keyword in health_keywords):

92 return 'health_pattern'

94 # Event planning keywords

95 event_keywords = ['celebration', 'party', 'reunion', 'birthday', 'wedding', '庆祝', '聚会', '生日']

96 if any(keyword in query_lower for keyword in event_keywords):

97 return 'event_planning'

99 # Heritage/tradition keywords

100 heritage_keywords = ['tradition', 'heritage', 'recipe', 'values', 'wisdom', '传统', '文化', '智慧']

101 if any(keyword in query_lower for keyword in heritage_keywords):

102 return 'cultural_heritage'

103

104 # Relationship keywords

105 relationship_keywords = ['family', 'relative', 'relationship', 'cousin', '亲戚', '家人', '关系']

106 if any(keyword in query_lower for keyword in relationship_keywords):

107 return 'relationship_discovery'

108

109 # Memory/story keywords

110 memory_keywords = ['story', 'memory', 'remember', 'childhood', 'past', '故事', '回忆', '童年']

111 if any(keyword in query_lower for keyword in memory_keywords): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 return 'memory_discovery'

113

114 return 'general'

115

116 def _build_context(self, search_results: List[Dict], query_type: str) -> str:

117 """Build context string from search results"""

118 if not search_results:

119 return ""

120

121 context_parts = []

122 context_parts.append("Based on family records, here is relevant information:\n")

123

124 for i, result in enumerate(search_results, 1):

125 content_type = result.get('content_type', 'unknown')

126 title = result.get('title', 'Untitled')

127 content = result.get('content', '')

128 similarity = result.get('similarity', 0)

129

130 # Format based on content type

131 if content_type == 'story':

132 context_parts.append(f"{i}. Family Story: \"{title}\"")

133 context_parts.append(f" Content: {content}")

134

135 # Add people if available

136 people = result.get('people', [])

137 if people:

138 context_parts.append(f" People involved: {', '.join(people[:3])}")

139

140 elif content_type == 'event':

141 context_parts.append(f"{i}. Family Event: \"{title}\"")

142 context_parts.append(f" Description: {content}")

143

144 # Add event details

145 event_type = result.get('event_type', '')

146 location = result.get('location', '')

147 if event_type: 147 ↛ 149line 147 didn't jump to line 149 because the condition on line 147 was always true

148 context_parts.append(f" Type: {event_type}")

149 if location: 149 ↛ 177line 149 didn't jump to line 177 because the condition on line 149 was always true

150 context_parts.append(f" Location: {location}")

151

152 elif content_type == 'heritage':

153 context_parts.append(f"{i}. Family Heritage: \"{title}\"")

154 context_parts.append(f" Description: {content}")

155

156 # Add heritage details

157 heritage_type = result.get('heritage_type', '')

158 importance = result.get('importance', '')

159 origin_person = result.get('origin_person', '')

160 if heritage_type: 160 ↛ 162line 160 didn't jump to line 162 because the condition on line 160 was always true

161 context_parts.append(f" Type: {heritage_type}")

162 if origin_person: 162 ↛ 177line 162 didn't jump to line 177 because the condition on line 162 was always true

163 context_parts.append(f" Origin: {origin_person}")

164

165 elif content_type == 'health': 165 ↛ 177line 165 didn't jump to line 177 because the condition on line 165 was always true

166 context_parts.append(f"{i}. Health Record: \"{title}\"")

167 context_parts.append(f" Details: {content}")

168

169 # Add health details

170 person = result.get('person', '')

171 is_hereditary = result.get('is_hereditary', False)

172 if person: 172 ↛ 174line 172 didn't jump to line 174 because the condition on line 172 was always true

173 context_parts.append(f" Person: {person}")

174 if is_hereditary: 174 ↛ 177line 174 didn't jump to line 177 because the condition on line 174 was always true

175 context_parts.append(f" Hereditary: Yes")

176

177 context_parts.append(f" Relevance: {similarity:.2f}\n")

178

179 return "\n".join(context_parts)

180

181 def _generate_ai_response(self, query: str, context: str, query_type: str) -> str:

182 """Generate AI response using Anthropic Claude"""

183 try:

184 # Create system prompt based on query type

185 system_prompt = self._get_system_prompt(query_type)

186

187 # Create user message with context

188 user_message = f"""Family Knowledge Query: {query}

189

190{context}

191

192Please provide a helpful, warm, and family-focused response based on the information above.

193Speak as if you're a knowledgeable family member sharing precious memories and insights.

194If the query is in Chinese, please respond in Chinese. Otherwise, respond in English.

195"""

196

197 # Generate response with Claude

198 response = self.anthropic_client.messages.create(

199 model="claude-3-sonnet-20240229",

200 max_tokens=1000,

201 temperature=0.7,

202 system=system_prompt,

203 messages=[

204 {"role": "user", "content": user_message}

205 ]

206 )

207

208 return response.content[0].text

209

210 except Exception as e:

211 logger.error(f"AI response generation failed: {e}")

212 return self._generate_fallback_response(query, query_type)

213

214 def _get_system_prompt(self, query_type: str) -> str:

215 """Get system prompt based on query type"""

216 base_prompt = """You are a wise and caring family knowledge keeper. You help family members

217connect with their heritage, stories, and relationships. You speak with warmth, respect for

218elders, and deep appreciation for family bonds."""

219

220 type_specific = {

221 'memory_discovery': " Focus on bringing family stories to life with vivid details and emotional context.",

222 'health_pattern': " Provide thoughtful health insights while emphasizing the importance of professional medical advice.",

223 'event_planning': " Suggest meaningful ways to celebrate that honor family traditions and create lasting memories.",

224 'cultural_heritage': " Share insights about family traditions and values with deep respect for cultural heritage.",

225 'relationship_discovery': " Help family members understand their connections and the importance of family bonds.",

226 'general': " Provide helpful and family-focused guidance based on the available information."

227 }

228

229 return base_prompt + type_specific.get(query_type, type_specific['general'])

230

231 def _generate_fallback_response(self, query: str, query_type: str) -> str:

232 """Generate fallback response when no relevant content is found"""

233 language = self._detect_language(query)

234

235 if language == 'zh-CN':

236 fallback_responses = {

237 'memory_discovery': "很抱歉，我在家庭记录中没有找到与您的问题直接相关的故事。不过，这可能是一个好机会来记录新的家庭记忆。您愿意分享一些相关的故事吗？",

238 'health_pattern': "关于您询问的健康问题，我在现有的家庭健康记录中没有找到相关信息。建议您咨询专业医生，并考虑将重要的健康信息添加到家庭记录中。",

239 'event_planning': "虽然我没有找到关于类似活动的具体记录，但我建议您可以创造新的家庭传统。考虑一下什么样的庆祝方式最能体现您家庭的价值观和喜好。",

240 'cultural_heritage': "这是一个很好的问题！虽然我没有找到相关的传统记录，但这正是开始记录家庭文化传承的好时机。",

241 'relationship_discovery': "关于家庭关系的问题，我建议您可以与长辈交流，了解更多家族史。同时，将这些珍贵的关系信息记录下来会很有价值。",

242 'general': "很抱歉，我没有找到与您的问题直接相关的家庭信息。不过，我很乐意帮助您思考如何收集和记录相关信息。"

243 }

244 else:

245 fallback_responses = {

246 'memory_discovery': "I couldn't find specific family stories related to your question in our records. This might be a wonderful opportunity to capture new family memories. Would you like to share some related stories?",

247 'health_pattern': "I don't have specific health information related to your question in our family records. I recommend consulting with healthcare professionals and considering adding important health information to your family records.",

248 'event_planning': "While I don't have records of similar events, this could be a chance to create new family traditions. Consider what type of celebration would best reflect your family's values and preferences.",

249 'cultural_heritage': "That's a wonderful question! While I don't have specific records about this tradition, this could be a perfect time to start documenting your family's cultural heritage.",

250 'relationship_discovery': "For questions about family relationships, I suggest speaking with elder family members to learn more about your family history. Recording these precious connections would be very valuable.",

251 'general': "I couldn't find information directly related to your question in our family records. However, I'd be happy to help you think about how to gather and record relevant information."

252 }

253

254 return fallback_responses.get(query_type, fallback_responses['general'])

255

256 def _detect_language(self, query: str) -> str:

257 """Simple language detection"""

258 # Check for Chinese characters

259 chinese_chars = sum(1 for char in query if '\u4e00' <= char <= '\u9fff')

260 if chinese_chars > len(query) * 0.3: # More than 30% Chinese characters

261 return 'zh-CN'

262 return 'en-US'

263

264 def _calculate_confidence(self, search_results: List[Dict]) -> float:

265 """Calculate confidence score based on search results"""

266 if not search_results:

267 return 0.0

268

269 # Average similarity of top 3 results

270 top_similarities = [r.get('similarity', 0) for r in search_results[:3]]

271 avg_similarity = sum(top_similarities) / len(top_similarities)

272

273 # Boost confidence if we have multiple good results

274 count_boost = min(len(search_results) * 0.1, 0.2)

275

276 return min(avg_similarity + count_boost, 1.0)

277

278 def _format_sources(self, search_results: List[Dict]) -> List[Dict]:

279 """Format search results as sources"""

280 sources = []

281

282 for result in search_results:

283 source = {

284 'type': result.get('content_type', 'unknown'),

285 'id': result.get('id'),

286 'title': result.get('title', 'Untitled'),

287 'relevance': round(result.get('similarity', 0), 3)

288 }

289

290 # Add type-specific fields

291 content_type = result.get('content_type')

292 if content_type == 'story':

293 source['story_type'] = result.get('story_type', '')

294 source['people'] = result.get('people', [])[:2] # Limit to 2 people

295 elif content_type == 'event': 295 ↛ 298line 295 didn't jump to line 298 because the condition on line 295 was always true

296 source['event_type'] = result.get('event_type', '')

297 source['date'] = result.get('start_date', '')

298 elif content_type == 'heritage':

299 source['heritage_type'] = result.get('heritage_type', '')

300 source['importance'] = result.get('importance', '')

301 elif content_type == 'health':

302 source['person'] = result.get('person', '')

303 source['is_hereditary'] = result.get('is_hereditary', False)

304

305 sources.append(source)

306

307 return sources

308

309 def _generate_error_response(self, query: str, error: str) -> Dict[str, Any]:

310 """Generate error response"""

311 language = self._detect_language(query)

312

313 if language == 'zh-CN':

314 error_message = "抱歉，处理您的问题时遇到了技术问题。请稍后再试，或者联系系统管理员。"

315 else:

316 error_message = "I'm sorry, but I encountered a technical issue while processing your question. Please try again later or contact the system administrator."

317

318 return {

319 'query': query,

320 'response': error_message,

321 'sources': [],

322 'metadata': {

323 'query_type': 'error',

324 'confidence': 0.0,

325 'processing_time': 0.0,

326 'sources_count': 0,

327 'language': language,

328 'error': error

329 }

330 }

331

332

333# Global service instance

334rag_service = RAGService()

Coverage for ai_integration/services/rag_service.py: 90%

148 statements