alberto commited on
Commit
7e1bf0b
·
1 Parent(s): 0d0af81

tool improvement, smaller model

Browse files
Files changed (4) hide show
  1. app.py +16 -23
  2. requirements.txt +1 -0
  3. system_prompt.txt +3 -7
  4. tools.py +125 -24
app.py CHANGED
@@ -4,15 +4,12 @@ import requests
4
  import inspect
5
  import pandas as pd
6
  from smolagents import (
7
- VisitWebpageTool,
8
  DuckDuckGoSearchTool,
9
- WikipediaSearchTool,
10
  PythonInterpreterTool,
11
  FinalAnswerTool,
12
- TransformersModel,
13
  InferenceClientModel)
14
  from smolagents.agents import CodeAgent
15
- from tools import VisitWikiPageTool, SpeechToTextTool
16
  from utils import QuestionLoader
17
 
18
  with open('system_prompt.txt', 'r') as file:
@@ -23,18 +20,26 @@ with open('system_prompt.txt', 'r') as file:
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
  # Models
26
- MODEL = InferenceClientModel("Qwen/Qwen3-VL-30B-A3B-Instruct")
 
27
 
28
  # --- Basic Agent Definition ---
29
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
30
  class BasicAgent:
31
  def __init__(self):
32
- print("BasicAgent initialized.")
33
- def run(self, task: str, **kwargs) -> str:
34
- print(f"Agent received question (first 50 chars): {task[:50]}...")
35
- fixed_answer = "This is a default answer."
36
- print(f"Agent returning fixed answer: {fixed_answer}")
37
- return fixed_answer
 
 
 
 
 
 
 
38
 
39
 
40
 
@@ -61,18 +66,6 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
61
  # 1. Instantiate Agent ( modify this part to create your agent)
62
  try:
63
  agent = BasicAgent()
64
- agent = CodeAgent(
65
- model=MODEL,
66
- max_steps=10,
67
- tools=[
68
- DuckDuckGoSearchTool(),
69
- WikipediaSearchTool(),
70
- VisitWebpageTool(),
71
- VisitWikiPageTool(user_agent="hf-agent-course"),
72
- SpeechToTextTool(),
73
- PythonInterpreterTool(),
74
- FinalAnswerTool()])
75
- agent.prompt_templates["system_prompt"] = system_prompt
76
  except Exception as e:
77
  print(f"Error instantiating agent: {e}")
78
  return f"Error initializing agent: {e}", None
 
4
  import inspect
5
  import pandas as pd
6
  from smolagents import (
 
7
  DuckDuckGoSearchTool,
 
8
  PythonInterpreterTool,
9
  FinalAnswerTool,
 
10
  InferenceClientModel)
11
  from smolagents.agents import CodeAgent
12
+ from tools import VisitWebpageTool, SpeechToTextTool
13
  from utils import QuestionLoader
14
 
15
  with open('system_prompt.txt', 'r') as file:
 
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
  # Models
23
+ #MODEL = InferenceClientModel("Qwen/Qwen3-VL-30B-A3B-Instruct")
24
+ MODEL = InferenceClientModel("Qwen/Qwen3-VL-7B-Instruct")
25
 
26
  # --- Basic Agent Definition ---
27
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
28
  class BasicAgent:
29
  def __init__(self):
30
+ self.agent = CodeAgent(
31
+ model=MODEL,
32
+ max_steps=10,
33
+ tools=[
34
+ DuckDuckGoSearchTool(),
35
+ VisitWebpageTool(),
36
+ SpeechToTextTool(),
37
+ PythonInterpreterTool(),
38
+ FinalAnswerTool()])
39
+ self.agent.prompt_templates["system_prompt"] = system_prompt
40
+
41
+ def run(self, **kwargs) -> str:
42
+ return self.agent.run(**kwargs)
43
 
44
 
45
 
 
66
  # 1. Instantiate Agent ( modify this part to create your agent)
67
  try:
68
  agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
 
 
69
  except Exception as e:
70
  print(f"Error instantiating agent: {e}")
71
  return f"Error initializing agent: {e}", None
requirements.txt CHANGED
@@ -13,3 +13,4 @@ pandas
13
  torch==2.9
14
  torchaudio
15
  torchcodec
 
 
13
  torch==2.9
14
  torchaudio
15
  torchcodec
16
+ markdown-it-py==4.0.0
system_prompt.txt CHANGED
@@ -1,4 +1,4 @@
1
- You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
2
  To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
3
  To solve the task, you must plan forward to proceed in a series of steps, in a cycle of Thought, Code, and Observation sequences.
4
 
@@ -172,13 +172,9 @@ specified otherwise.
172
  15. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in
173
  the list is a number or a string.
174
  16. Skip questions related to youtube videos since you do not have the tools to answer. Just answer 'Skip' in such cases.
 
 
175
 
176
- Here are suggestions, these are not rules, so you may decide to not follow them, however, they can make solving tasks easier:
177
- 1. Some questions are related to a file, use the 'get_question_file' tool to retrieve the question's file content.
178
- 2. The 'wikipedia_search' tool often returns incomplete results and is not able to parse tables. In order to get the full content of a wikipedia pages
179
- you should rely on the 'visit_wikipage' tool.
180
- 3. To visit wikipedia pages you should use the 'visit_wikipage' tool and NOT the generic 'visit_webpage' tool since the latter won't work.
181
- 4. Instead of using regex or code instruction to extract information from text it you are often better of relying on your own text understading capabilities.
182
 
183
  {%- if custom_instructions %}
184
  {{custom_instructions}}
 
1
+ You are an expert assistant who can solve any task. You will be given a task to solve as best you can, you can also use code if you need to.
2
  To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
3
  To solve the task, you must plan forward to proceed in a series of steps, in a cycle of Thought, Code, and Observation sequences.
4
 
 
172
  15. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in
173
  the list is a number or a string.
174
  16. Skip questions related to youtube videos since you do not have the tools to answer. Just answer 'Skip' in such cases.
175
+ 17. Wikipedia is your first to go website for facts and information retrieval.
176
+ 18. Don't over-use or over-rely on code to solve taks, use it when necessary.
177
 
 
 
 
 
 
 
178
 
179
  {%- if custom_instructions %}
180
  {{custom_instructions}}
tools.py CHANGED
@@ -1,39 +1,51 @@
1
  from typing import Dict
2
  from transformers import pipeline
 
3
  from smolagents.tools import Tool
4
  import torchcodec
5
 
6
 
7
- class VisitWikiPageTool(Tool):
8
- name = "visit_wikipage"
9
  description = (
10
- "Visits a Wikipedia page at the given url and reads its content as a markdown string. Use this to browse Wikipedia wepages and get their full content."
11
  )
12
  inputs = {
13
  "url": {
14
  "type": "string",
15
  "description": "The url of the webpage to visit.",
16
  },
17
- "max_length": {
18
- "type": "integer",
19
- "description": "Maximum number of characters to include in the response. Default 40000.",
20
- "nullable": True
21
- }
22
  }
23
  output_type = "string"
24
 
25
- def __init__(self, user_agent: str):
 
 
 
 
26
  super().__init__()
 
27
  self.headers = {"User-Agent": user_agent}
28
 
29
- def _truncate_content(self, content: str, max_length: int) -> str:
30
- if len(content) <= max_length:
31
- return content
32
- return (
33
- content[:max_length] + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
34
- )
35
 
36
- def forward(self, url: str, max_length: int = 40000) -> str:
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
  import re
39
  import requests
@@ -50,11 +62,16 @@ class VisitWikiPageTool(Tool):
50
 
51
  # Convert the HTML content to Markdown
52
  markdown_content = markdownify(response.text).strip()
53
- max_length = max_length if max_length is not None else 40000
54
  # Remove multiple line breaks
55
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
56
- return self._truncate_content(markdown_content, max_length)
57
-
 
 
 
 
 
58
  except requests.exceptions.Timeout:
59
  return "The request timed out. Please try again later or check the URL."
60
  except RequestException as e:
@@ -62,6 +79,7 @@ class VisitWikiPageTool(Tool):
62
  except Exception as e:
63
  return f"An unexpected error occurred: {str(e)}"
64
 
 
65
  class SpeechToTextTool(Tool):
66
  name = "transcriber"
67
  description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
@@ -107,8 +125,91 @@ class SpeechToTextTool(Tool):
107
  self.pipe = pipeline("automatic-speech-recognition", model=model)
108
 
109
  def forward(self, audio_file: str, sample_rate: int=16000) -> str:
110
- sample_rate = sample_rate if sample_rate is not None else 16000
111
- with open(audio_file, "rb") as f:
112
- decoder = torchcodec.decoders.AudioDecoder(f, sample_rate=sample_rate)
113
- out = self.pipe(decoder)
114
- return out["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict
2
  from transformers import pipeline
3
+ from markdown_it import MarkdownIt
4
  from smolagents.tools import Tool
5
  import torchcodec
6
 
7
 
8
+ class VisitWebpageTool(Tool):
9
+ name = "visit_webpage"
10
  description = (
11
+ "Visits a web page at the given url and reads its content as a markdown string and store it to a file"
12
  )
13
  inputs = {
14
  "url": {
15
  "type": "string",
16
  "description": "The url of the webpage to visit.",
17
  },
 
 
 
 
 
18
  }
19
  output_type = "string"
20
 
21
+ def __init__(
22
+ self,
23
+ file_name: str = "web_content.md",
24
+ user_agent: str = "agent-course"
25
+ ):
26
  super().__init__()
27
+ self.file_name = file_name
28
  self.headers = {"User-Agent": user_agent}
29
 
30
+ #def _truncate_content(self, content: str, max_length: int) -> str:
31
+ # if len(content) <= max_length:
32
+ # return content
33
+ # return (
34
+ # content[:max_length] + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
35
+ # )
36
 
37
+ def _inspect(self, doc: str) -> str:
38
+ mdit = MarkdownIt()
39
+ tokens = mdit.parse(doc)
40
+ content_table = ""
41
+ for token in tokens:
42
+ if token.type == "heading_open":
43
+ level = int(token.tag[-1]) - 1
44
+ text = token.map and tokens[tokens.index(token) + 1].content
45
+ content_table += " " * level + text + "\n"
46
+ return content_table
47
+
48
+ def forward(self, url: str) -> str:
49
  try:
50
  import re
51
  import requests
 
62
 
63
  # Convert the HTML content to Markdown
64
  markdown_content = markdownify(response.text).strip()
65
+
66
  # Remove multiple line breaks
67
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
68
+ with open(self.file_name, "w") as f:
69
+ f.write(markdown_content)
70
+ try:
71
+ content_summary = self._inspect(markdown_content)
72
+ return f"Web page content saved in '{self.file_name}'. The content has the following section tree:\n {content_summary}. To read the full website content you can call 'read_mddoc('web_content.md')'"
73
+ except Exception:
74
+ return f"Web page content saved in {self.file_name}."
75
  except requests.exceptions.Timeout:
76
  return "The request timed out. Please try again later or check the URL."
77
  except RequestException as e:
 
79
  except Exception as e:
80
  return f"An unexpected error occurred: {str(e)}"
81
 
82
+
83
  class SpeechToTextTool(Tool):
84
  name = "transcriber"
85
  description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
 
125
  self.pipe = pipeline("automatic-speech-recognition", model=model)
126
 
127
  def forward(self, audio_file: str, sample_rate: int=16000) -> str:
128
+ try:
129
+ sample_rate = sample_rate if sample_rate is not None else 16000
130
+ with open(audio_file, "rb") as f:
131
+ decoder = torchcodec.decoders.AudioDecoder(f, sample_rate=sample_rate)
132
+ audio_length = decoder.get_all_samples().data.shape[1]
133
+ out = self.pipe(decoder)
134
+ return out["text"]
135
+ except ValueError as e:
136
+ max_length = 300000
137
+ suggest_sample_rate = int(sample_rate * max_length/audio_length)
138
+ return f"The audio file to transcribe is too long, number of samples {audio_length}. You used a sample_rate of {sample_rate}, try using a smaller sample rate, like {suggest_sample_rate}"
139
+ except Exception as e:
140
+ raise e
141
+
142
+
143
+ class ReadMdDoc(Tool):
144
+ name = "read_mddoc"
145
+ description = (
146
+ "Read an entire markdown file or a specific section of it."
147
+ )
148
+ inputs = {
149
+ "file_name": {
150
+ "type": "string",
151
+ "description": "The file to read it should have 'md' extension.",
152
+ },
153
+ "section": {
154
+ "type": "string",
155
+ "nullable": True,
156
+ "description": "If you want to read the entire file set this to 'all'. Otherwise you can look for a specific section title."
157
+ },
158
+ "max_length":{
159
+ "type": "integer",
160
+ "nullable": True,
161
+ "description": "The maximum number of characters to return if the content has more characters it will be truncated. Use 40000 as a default."
162
+ }
163
+ }
164
+ output_type = "string"
165
+
166
+ def __init__(self):
167
+ super().__init__()
168
+
169
+ def _truncate_content(self, content: str, max_length: int) -> str:
170
+ if len(content) <= max_length:
171
+ return content
172
+ return (
173
+ content[:max_length] + f"\n..._This content has been truncated to stay below {max_length} characters_...\n Does it have the information you need otherwise increase the max_length."
174
+ )
175
+
176
+ def get_token_map(self, tokens):
177
+ token_map = defaultdict(list)
178
+ stack = []
179
+ for i, token in enumerate(tokens):
180
+ if token.type == "heading_open":
181
+ text = token.map and tokens[tokens.index(token) + 1].content
182
+ token_map[text].append(i)
183
+ level = int(token.tag[-1])
184
+ while stack and level <= stack[-1][-1]:
185
+ key, _ = stack.pop()
186
+ token_map[key].append(i)
187
+ stack.append((text, level))
188
+ while stack:
189
+ text, _ = stack.pop()
190
+ token_map[text].append(i)
191
+ return token_map
192
+
193
+ def forward(
194
+ self,
195
+ file_name: str,
196
+ section: str = "all",
197
+ max_length: int = 40000):
198
+ try:
199
+ with open(file_name, "r") as f:
200
+ doc = f.read()
201
+ except FileNotFoundError:
202
+ return f"Can't find {file_name}, are you sure the file exists and that you have spelled it crrectly?"
203
+ try:
204
+ mdit = MarkdownIt()
205
+ tokens = mdit.parse(doc)
206
+ except Exception:
207
+ return "Error using the markdown parser, are you sure the file is in markdown format?"
208
+ token_map = self.get_token_map(tokens)
209
+ token_map["all"] = [0, len(tokens)]
210
+ if section in token_map:
211
+ start, end = tuple(token_map[section])
212
+ content = "\n".join([t.content for t in tokens[start:end]])
213
+ return self._truncate_content(content, max_length)
214
+ else:
215
+ return f"The required Section is not found in the document. The available sections are:\n {list(token_map.keys())}. If you don't see what you are looking for here, you can try returning all the document using setting argument section to 'all'"