def __init__(
self,
*,
llm_config: Optional[Union[LLMConfig, dict[str, Any]]] = None,
firecrawl_api_key: Optional[str] = None,
firecrawl_api_url: Optional[str] = None,
):
"""
Initializes the FirecrawlTool.
Args:
llm_config (Optional[Union[LLMConfig, dict[str, Any]]]): LLM configuration. (Currently unused but kept for potential future integration).
firecrawl_api_key (Optional[str]): The API key for the Firecrawl API. If not provided,
it attempts to read from the `FIRECRAWL_API_KEY` environment variable.
firecrawl_api_url (Optional[str]): The base URL for the Firecrawl API. If not provided,
it attempts to read from the `FIRECRAWL_API_URL` environment variable, or defaults
to the public Firecrawl API. Use this parameter to connect to self-hosted Firecrawl instances.
Raises:
ValueError: If `firecrawl_api_key` is not provided either directly or via the environment variable.
"""
self.firecrawl_api_key = firecrawl_api_key or os.getenv("FIRECRAWL_API_KEY")
self.firecrawl_api_url = firecrawl_api_url or os.getenv("FIRECRAWL_API_URL")
if self.firecrawl_api_key is None:
raise ValueError(
"firecrawl_api_key must be provided either as an argument or via FIRECRAWL_API_KEY env var"
)
def firecrawl_scrape(
url: Annotated[str, "The URL to scrape."],
firecrawl_api_key: Annotated[Optional[str], Depends(on(self.firecrawl_api_key))],
firecrawl_api_url: Annotated[Optional[str], Depends(on(self.firecrawl_api_url))],
formats: Annotated[Optional[list[str]], "Output formats (e.g., ['markdown', 'html'])"] = None,
include_tags: Annotated[Optional[list[str]], "HTML tags to include"] = None,
exclude_tags: Annotated[Optional[list[str]], "HTML tags to exclude"] = None,
headers: Annotated[Optional[dict[str, str]], "HTTP headers to use"] = None,
wait_for: Annotated[Optional[int], "Time to wait for page load in milliseconds"] = None,
timeout: Annotated[Optional[int], "Request timeout in milliseconds"] = None,
) -> list[dict[str, Any]]:
"""
Scrapes a single URL and returns the content.
Args:
url: The URL to scrape.
firecrawl_api_key: The API key for Firecrawl (injected dependency).
firecrawl_api_url: The base URL for the Firecrawl API (injected dependency).
formats: Output formats (e.g., ['markdown', 'html']). Defaults to ['markdown'].
include_tags: HTML tags to include. Defaults to None.
exclude_tags: HTML tags to exclude. Defaults to None.
headers: HTTP headers to use. Defaults to None.
wait_for: Time to wait for page load in milliseconds. Defaults to None.
timeout: Request timeout in milliseconds. Defaults to None.
Returns:
A list containing the scraped content with title, url, content, and metadata.
Raises:
ValueError: If the Firecrawl API key is not available.
"""
if firecrawl_api_key is None:
raise ValueError("Firecrawl API key is missing.")
return _firecrawl_scrape(
url=url,
firecrawl_api_key=firecrawl_api_key,
firecrawl_api_url=firecrawl_api_url,
formats=formats,
include_tags=include_tags,
exclude_tags=exclude_tags,
headers=headers,
wait_for=wait_for,
timeout=timeout,
)
def firecrawl_crawl(
url: Annotated[str, "The starting URL to crawl."],
firecrawl_api_key: Annotated[Optional[str], Depends(on(self.firecrawl_api_key))],
firecrawl_api_url: Annotated[Optional[str], Depends(on(self.firecrawl_api_url))],
limit: Annotated[int, "Maximum number of pages to crawl"] = 5,
formats: Annotated[Optional[list[str]], "Output formats (e.g., ['markdown', 'html'])"] = None,
include_paths: Annotated[Optional[list[str]], "URL patterns to include"] = None,
exclude_paths: Annotated[Optional[list[str]], "URL patterns to exclude"] = None,
max_depth: Annotated[Optional[int], "Maximum crawl depth"] = None,
allow_backward_crawling: Annotated[Optional[bool], "Allow crawling backward links"] = False,
allow_external_content_links: Annotated[Optional[bool], "Allow external links"] = False,
) -> list[dict[str, Any]]:
"""
Crawls a website starting from a URL and returns the content from multiple pages.
Args:
url: The starting URL to crawl.
firecrawl_api_key: The API key for Firecrawl (injected dependency).
firecrawl_api_url: The base URL for the Firecrawl API (injected dependency).
limit: Maximum number of pages to crawl. Defaults to 5.
formats: Output formats (e.g., ['markdown', 'html']). Defaults to ['markdown'].
include_paths: URL patterns to include. Defaults to None.
exclude_paths: URL patterns to exclude. Defaults to None.
max_depth: Maximum crawl depth. Defaults to None.
allow_backward_crawling: Allow crawling backward links. Defaults to False.
allow_external_content_links: Allow external links. Defaults to False.
Returns:
A list of crawled pages with title, url, content, and metadata for each page.
Raises:
ValueError: If the Firecrawl API key is not available.
"""
if firecrawl_api_key is None:
raise ValueError("Firecrawl API key is missing.")
return _firecrawl_crawl(
url=url,
firecrawl_api_key=firecrawl_api_key,
firecrawl_api_url=firecrawl_api_url,
limit=limit,
formats=formats,
include_paths=include_paths,
exclude_paths=exclude_paths,
max_depth=max_depth,
allow_backward_crawling=allow_backward_crawling or False,
allow_external_content_links=allow_external_content_links or False,
)
def firecrawl_map(
url: Annotated[str, "The website URL to map."],
firecrawl_api_key: Annotated[Optional[str], Depends(on(self.firecrawl_api_key))],
firecrawl_api_url: Annotated[Optional[str], Depends(on(self.firecrawl_api_url))],
search: Annotated[Optional[str], "Search term to filter URLs"] = None,
ignore_sitemap: Annotated[Optional[bool], "Whether to ignore the sitemap"] = False,
include_subdomains: Annotated[Optional[bool], "Whether to include subdomains"] = False,
limit: Annotated[int, "Maximum number of URLs to return"] = 5000,
) -> list[dict[str, Any]]:
"""
Maps a website to discover URLs.
Args:
url: The website URL to map.
firecrawl_api_key: The API key for Firecrawl (injected dependency).
firecrawl_api_url: The base URL for the Firecrawl API (injected dependency).
search: Search term to filter URLs. Defaults to None.
ignore_sitemap: Whether to ignore the sitemap. Defaults to False.
include_subdomains: Whether to include subdomains. Defaults to False.
limit: Maximum number of URLs to return. Defaults to 5000.
Returns:
A list of URLs found on the website.
Raises:
ValueError: If the Firecrawl API key is not available.
"""
if firecrawl_api_key is None:
raise ValueError("Firecrawl API key is missing.")
return _firecrawl_map(
url=url,
firecrawl_api_key=firecrawl_api_key,
firecrawl_api_url=firecrawl_api_url,
search=search,
ignore_sitemap=ignore_sitemap or False,
include_subdomains=include_subdomains or False,
limit=limit,
)
def firecrawl_search(
query: Annotated[str, "The search query string."],
firecrawl_api_key: Annotated[Optional[str], Depends(on(self.firecrawl_api_key))],
firecrawl_api_url: Annotated[Optional[str], Depends(on(self.firecrawl_api_url))],
limit: Annotated[int, "Maximum number of results to return"] = 5,
tbs: Annotated[Optional[str], "Time filter (e.g., 'qdr:d' for past day)"] = None,
filter: Annotated[Optional[str], "Custom result filter"] = None,
lang: Annotated[Optional[str], "Language code"] = "en",
country: Annotated[Optional[str], "Country code"] = "us",
location: Annotated[Optional[str], "Geo-targeting location"] = None,
timeout: Annotated[Optional[int], "Request timeout in milliseconds"] = None,
) -> list[dict[str, Any]]:
"""
Executes a search operation using the Firecrawl API.
Args:
query: The search query string.
firecrawl_api_key: The API key for Firecrawl (injected dependency).
firecrawl_api_url: The base URL for the Firecrawl API (injected dependency).
limit: Maximum number of results to return. Defaults to 5.
tbs: Time filter (e.g., "qdr:d" for past day). Defaults to None.
filter: Custom result filter. Defaults to None.
lang: Language code. Defaults to "en".
country: Country code. Defaults to "us".
location: Geo-targeting location. Defaults to None.
timeout: Request timeout in milliseconds. Defaults to None.
Returns:
A list of search results with title, url, content, and metadata.
Raises:
ValueError: If the Firecrawl API key is not available.
"""
if firecrawl_api_key is None:
raise ValueError("Firecrawl API key is missing.")
return _firecrawl_search(
query=query,
firecrawl_api_key=firecrawl_api_key,
firecrawl_api_url=firecrawl_api_url,
limit=limit,
tbs=tbs,
filter=filter,
lang=lang or "en",
country=country or "us",
location=location,
timeout=timeout,
)
def firecrawl_deep_research(
query: Annotated[str, "The research query or topic to investigate."],
firecrawl_api_key: Annotated[Optional[str], Depends(on(self.firecrawl_api_key))],
firecrawl_api_url: Annotated[Optional[str], Depends(on(self.firecrawl_api_url))],
max_depth: Annotated[int, "Maximum depth of research exploration"] = 7,
time_limit: Annotated[int, "Time limit in seconds for research"] = 270,
max_urls: Annotated[int, "Maximum number of URLs to process"] = 20,
analysis_prompt: Annotated[Optional[str], "Custom prompt for analysis"] = None,
system_prompt: Annotated[Optional[str], "Custom system prompt"] = None,
) -> dict[str, Any]:
"""
Executes a deep research operation using the Firecrawl API.
Args:
query: The research query or topic to investigate.
firecrawl_api_key: The API key for Firecrawl (injected dependency).
firecrawl_api_url: The base URL for the Firecrawl API (injected dependency).
max_depth: Maximum depth of research exploration. Defaults to 7.
time_limit: Time limit in seconds for research. Defaults to 270.
max_urls: Maximum number of URLs to process. Defaults to 20.
analysis_prompt: Custom prompt for analysis. Defaults to None.
system_prompt: Custom system prompt. Defaults to None.
Returns:
The deep research result from Firecrawl.
Raises:
ValueError: If the Firecrawl API key is not available.
"""
if firecrawl_api_key is None:
raise ValueError("Firecrawl API key is missing.")
return _execute_firecrawl_deep_research(
query=query,
firecrawl_api_key=firecrawl_api_key,
firecrawl_api_url=firecrawl_api_url,
max_depth=max_depth,
time_limit=time_limit,
max_urls=max_urls,
analysis_prompt=analysis_prompt,
system_prompt=system_prompt,
)
# Default to scrape functionality for the main tool
super().__init__(
name="firecrawl_scrape",
description="Use the Firecrawl API to scrape content from a single URL.",
func_or_tool=firecrawl_scrape,
)
# Store additional methods for manual access
self.scrape = firecrawl_scrape
self.crawl = firecrawl_crawl
self.map = firecrawl_map
self.search = firecrawl_search
self.deep_research = firecrawl_deep_research