|
17 | 17 | """Docs Agent"""
|
18 | 18 |
|
19 | 19 | import typing
|
| 20 | +import os, pathlib |
20 | 21 |
|
21 | 22 | from absl import logging
|
22 | 23 | import google.api_core
|
@@ -573,6 +574,73 @@ def ask_content_model_to_fact_check_prompt(self, context: str, prev_response: st
|
573 | 574 | def generate_embedding(self, text, task_type: str = "SEMANTIC_SIMILARITY"):
|
574 | 575 | return self.gemini.embed(text, task_type)[0]
|
575 | 576 |
|
| 577 | + # Generate a response to an image |
| 578 | + def ask_model_about_image(self, prompt: str, image): |
| 579 | + if not prompt: |
| 580 | + prompt = f"Describe this image:" |
| 581 | + if self.context_model.startswith("models/gemini-1.5"): |
| 582 | + try: |
| 583 | + # Adding prompt in the beginning allows long contextual |
| 584 | + # information to be added. |
| 585 | + response = self.gemini.generate_content([prompt, image]) |
| 586 | + except google.api_core.exceptions.InvalidArgument: |
| 587 | + return self.config.conditions.model_error_message |
| 588 | + else: |
| 589 | + logging.error(f"The {self.context_model} can't read an image.") |
| 590 | + response = None |
| 591 | + exit(1) |
| 592 | + return response |
| 593 | + |
| 594 | + # Generate a response to audio |
| 595 | + def ask_model_about_audio(self, prompt: str, audio): |
| 596 | + if not prompt: |
| 597 | + prompt = f"Describe this audio clip:" |
| 598 | + audio_size = os.path.getsize(audio) |
| 599 | + # Limit is 20MB |
| 600 | + if audio_size > 20000000: |
| 601 | + logging.error(f"The audio clip {audio} is too large: {audio_size} bytes.") |
| 602 | + exit(1) |
| 603 | + # Get the mime type of the audio file and trim the . from the extension. |
| 604 | + mime_type = "audio/" + pathlib.Path(audio).suffix[:1] |
| 605 | + audio_clip = { |
| 606 | + "mime_type": mime_type, |
| 607 | + "data": pathlib.Path(audio).read_bytes() |
| 608 | + } |
| 609 | + if self.context_model.startswith("models/gemini-1.5"): |
| 610 | + try: |
| 611 | + response = self.gemini.generate_content([prompt, audio_clip]) |
| 612 | + except google.api_core.exceptions.InvalidArgument: |
| 613 | + return self.config.conditions.model_error_message |
| 614 | + else: |
| 615 | + logging.error(f"The {self.context_model} can't read an audio clip.") |
| 616 | + exit(1) |
| 617 | + return response |
| 618 | + |
| 619 | + # Generate a response to video |
| 620 | + def ask_model_about_video(self, prompt: str, video): |
| 621 | + if not prompt: |
| 622 | + prompt = f"Describe this video clip:" |
| 623 | + video_size = os.path.getsize(video) |
| 624 | + # Limit is 2GB |
| 625 | + if video_size > 2147483648: |
| 626 | + logging.error(f"The video clip {video} is too large: {video_size} bytes.") |
| 627 | + exit(1) |
| 628 | + request_options = { |
| 629 | + "timeout": 600 |
| 630 | + } |
| 631 | + mime_type = "video/" + pathlib.Path(video).suffix[:1] |
| 632 | + video_clip_uploaded =self.gemini.upload_file(video) |
| 633 | + video_clip = self.gemini.get_file(video_clip_uploaded) |
| 634 | + if self.context_model.startswith("models/gemini-1.5"): |
| 635 | + try: |
| 636 | + response = self.gemini.generate_content([prompt, video_clip], |
| 637 | + request_options=request_options) |
| 638 | + except google.api_core.exceptions.InvalidArgument: |
| 639 | + return self.config.conditions.model_error_message |
| 640 | + else: |
| 641 | + logging.error(f"The {self.context_model} can't see video clips.") |
| 642 | + exit(1) |
| 643 | + return response |
576 | 644 |
|
577 | 645 | # Function to give an embedding function for gemini using an API key
|
578 | 646 | def embedding_function_gemini_retrieval(api_key, embedding_model: str):
|
|
0 commit comments