Blog

  • Wikipedia Tool

    from langchain_community.tools import WikipediaQueryRun
    from langchain_community.utilities import WikipediaAPIWrapper
    from langchain_core.pydantic_v1 import BaseModel, Field
    
    
    class WikiInputs(BaseModel):
        """Inputs to the wikipedia tool."""
    
        query: str = Field(
            description="query to look up in Wikipedia, should be 3 or less words"
        )
    
    
    api_wrapper = WikipediaAPIWrapper(api_key="Enter your key here")
    
    tool = WikipediaQueryRun(
        name="wiki-tool",
        description="look up things in wikipedia",
        args_schema=WikiInputs,
        api_wrapper=api_wrapper,
        return_direct=True,
    )
    
    print(tool.run("Who is Dwayne Johnson?"))
  • Exa Tools

    from exa_py import Exa
    from praisonai_tools import BaseTool
    
    class ExaSearch:
        name: str = "ExaSearch"
        description: str = "Perform a search using this ExaSearch tool and returns search results with url"
    
        def __init__(self, api_key: str):
            self.exa = Exa(api_key=api_key)
    
        def run(self, query: str):
            results = self.exa.search_and_contents(
                query,
                text={"include_html_tags": True, "max_characters": 1000},
            )
            return results
    
    
    class ExaSimilar:
        name: str = "ExaSimilar"
        description: str = "Search for webpages similar to a given URL using ExaSimilar tool"
    
        def __init__(self, api_key: str):
            self.exa = Exa(api_key=api_key)
    
        def run(self, url: str):
            """Search for webpages similar to a given URL.
            The url passed in should be a URL returned from `search`.
            """
            results = self.exa.find_similar(url, num_results=3)
            return results
    
    
    class ExaContents:
        name: str = "ExaContents"
        description: str = "Get the contents of a webpage using a list of urls using ExaContents tool"
    
        def __init__(self, api_key: str):
            self.exa = Exa(api_key=api_key)
    
        def run(self, ids: list):
            """Get the contents of a webpage.
            The ids must be passed in as a list, a list of ids returned from `search`.
            """
            contents = self.exa.get_contents(ids)
            contents_str = str(contents)
            split_contents = contents_str.split("URL:")
            trimmed_contents = [content[:1000] for content in split_contents]
            return "\n\n".join(trimmed_contents)
    
    # Example usage
    if __name__ == "__main__":
        api_key = "Enter your exa key"
        tool = ExaSearch(api_key=api_key)
        search_query = "latest AI News"
        search_results = tool.run(search_query)
        print("Search Results:", search_results)
    
        # Find similar webpages
        find_similar_url = "https://boomi.com/"  # Valid URL
        similar_results = tool._run_similar(find_similar_url)
        print("Similar Results:", similar_results)
    
        # Get contents using ids
        content_ids = ["tesla.com"]  # Replace with actual IDs
        contents = tool._run_get_contents(content_ids)
        print("Contents:", contents)
    
  • Crewai Apify Tool

    from crewai import Agent, Task, Crew
    import os
    from apify_client import ApifyClient
    from langchain.tools import tool
    from typing_extensions import Annotated
    
    client = ApifyClient("Enter your Apify key here")
    
    @tool("Web Scraper Tool")
    def web_scraper_tool(url: Annotated[str, "https://example.com/"]) -> Annotated[str, "Scraped content"]:
        """Web Scraper loads Start URLs in the browser and executes Page function on each page to extract data from it."""
    
        run_input = {
            "runMode": "DEVELOPMENT",
            "startUrls": [{ "url": url }],
            "linkSelector": "a[href]",
            "globs": [{ "glob": "https://example.com/*" }],
            "pseudoUrls": [],
            "excludes": [{ "glob": "/**/*.{png,jpg,jpeg,pdf}" }],
            "pageFunction": """// The function accepts a single argument: the "context" object.
            // see https://apify.com/apify/web-scraper#page-function
            async function pageFunction(context) {
                // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
                // debugger;
                const $ = context.jQuery;
                const pageTitle = $('title').first().text();
                const h1 = $('h1').first().text();
                const first_h2 = $('h2').first().text();
                const random_text_from_the_page = $('p').first().text();
                // Print some information to actor log
                context.log.info(`URL: ${context.request.url}, TITLE: ${pageTitle}`);
    
                // Manually add a new page to the queue for scraping.
                await context.enqueueRequest({ url: context.request.url });
    
                return {
                    url: context.request.url,
                    pageTitle,
                    h1,
                    first_h2,
                    random_text_from_the_page
                };
            }""",
            "proxyConfiguration": { "useApifyProxy": True },
            "initialCookies": [],
            "waitUntil": ["networkidle2"],
            "preNavigationHooks": """// We need to return array of (possibly async) functions here.
            // and "gotoOptions".
            [
                async (crawlingContext, gotoOptions) => {
                    // ...
                },
            ]
            """,
            "postNavigationHooks": """// We need to return array of (possibly async) functions here.
            // The functions accept a single argument: the "crawlingContext" object.
            [
                async (crawlingContext) => {
                    // ...
                },
            ]""",
            "breakpointLocation": "NONE",
        }
        # Run the Actor and wait for it to finish
        run = client.actor("apify/web-scraper").call(run_input=run_input)
    
        # Fetch and print Actor results from the run's dataset (if there are any)
        #print("Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
        text_data = ""
        for item in client.dataset(run["defaultDatasetId"]).iterate_items():
            text_data += str(item) + "\n"
        return text_data
    
    # Create the web scraper agent
    web_scraper_agent = Agent(
        role='Web Scraper',
        goal='Effectively scrape data from websites for your company',
        backstory='''You are an expert web scraper. Your job is to scrape all the data for
                    your company from a given website.
                    ''',
        tool=web_scraper_tool,  # Ensure tools is set correctly
        verbose=True
    )
    
    # Define the web scraper task
    web_scraper_task = Task(
        description='Scrape all the URLs on the site so your company can use it for crawling and scraping.',
        expected_output='All the content of the website listed.',
        agent=web_scraper_agent,
        output_file='data.txt'
    )
    
    # Assemble the crew
    crew = Crew(
        agents=[web_scraper_agent],
        tasks=[web_scraper_task],
        verbose=2,
    )
    
    # Execute tasks
    result = crew.kickoff()
    print(result)
    
    # Save the result to a file
    with open('results.txt', 'w') as f:
        f.write(result)
  • Exa Search Tool

    from exa_py import Exa
    from praisonai_tools import BaseTool
    
    class ExaSearchTool:
        name: str = "ExaSearchTool"
        description: str = "Perform a search using Exa and retrieve contents with specified options"
    
        def __init__(self, api_key: str):
            self.exa = Exa(api_key="Enter your key here")
    
        def _run(self, query: str):
            results = self.exa.search_and_contents(
                query,
                text={"include_html_tags": True, "max_characters": 1000},
            )
            return results
    # Example usage
    if __name__ == "__main__":
        api_key = "ExaSearchTool"
        tool = ExaSearchTool(api_key=api_key)
        search_query = "recent midjourney news"
        results = tool._run(search_query)
        print(results)
  • AutoGen Scraping

    ! pip install -qqq pyautogen apify-client
    
    import os
    import openai
    
    config_list = [
        {"model": "gpt-3.5-turbo", "api_key": "Enter your api key"},
    ]
    
    from apify_client import ApifyClient
    from typing_extensions import Annotated
    
    
    def scrape_page(url: Annotated[str, "https://example.com/"]) -> Annotated[str, "Scraped content"]:
        # Initialize the ApifyClient with your API token
        client = ApifyClient(token="Enter your apify key")
    
        # Prepare the Actor input
        run_input = {
            "startUrls": [{"url": url}],
            "useSitemaps": False,
            "crawlerType": "playwright:firefox",
            "includeUrlGlobs": [],
            "excludeUrlGlobs": [],
            "ignoreCanonicalUrl": False,
            "maxCrawlDepth": 0,
            "maxCrawlPages": 4,
            "initialConcurrency": 0,
            "maxConcurrency": 200,
            "initialCookies": [],
            "proxyConfiguration": {"useApifyProxy": True},
            "maxSessionRotations": 10,
            "maxRequestRetries": 5,
            "requestTimeoutSecs": 60,
            "dynamicContentWaitSecs": 10,
            "maxScrollHeightPixels": 5000,
            "removeElementsCssSelector": """nav, footer, script, style, noscript, svg,
        [role=\"alert\"],
        [role=\"banner\"],
        [role=\"dialog\"],
        [role=\"alertdialog\"],
        [role=\"region\"][aria-label*=\"skip\" i],
        [aria-modal=\"true\"]""",
            "removeCookieWarnings": True,
            "clickElementsCssSelector": '[aria-expanded="false"]',
            "htmlTransformer": "readableText",
            "readableTextCharThreshold": 100,
            "aggressivePrune": False,
            "debugMode": True,
            "debugLog": True,
            "saveHtml": True,
            "saveMarkdown": True,
            "saveFiles": False,
            "saveScreenshots": False,
            "maxResults": 9999999,
            "clientSideMinChangePercentage": 15,
            "renderingTypeDetectionPercentage": 10,
        }
    
        # Run the Actor and wait for it to finish
        run = client.actor("aYG0l9s7dbB7j3gbS").call(run_input=run_input)
    
        # Fetch and print Actor results from the run's dataset (if there are any)
        text_data = ""
        for item in client.dataset(run["defaultDatasetId"]).iterate_items():
            text_data += item.get("text", "") + "\n"
    
        average_token = 0.75
        max_tokens = 20000  # slightly less than max to be safe 32k
        text_data = text_data[: int(average_token * max_tokens)]
        return text_data
    
    from autogen import ConversableAgent, register_function
    
    # Create web scrapper agent.
    scraper_agent = ConversableAgent(
        "WebScraper",
        llm_config={"config_list": config_list},
        system_message="You are a web scrapper and you can scrape any web page using the tools provided. "
        "Returns 'TERMINATE' when the scraping is done.",
    )
    
    # Create user proxy agent.
    user_proxy_agent = ConversableAgent(
        "UserProxy",
        llm_config=False,  # No LLM for this agent.
        human_input_mode="NEVER",
        code_execution_config=False,  # No code execution for this agent.
        is_termination_msg=lambda x: x.get("content", "") is not None and "terminate" in x["content"].lower(),
        default_auto_reply="Please continue if not finished, otherwise return 'TERMINATE'.",
    )
    
    # Register the function with the agents.
    register_function(
        scrape_page,
        caller=scraper_agent,
        executor=user_proxy_agent,
        name="scrape_page",
        description="Scrape a web page and return the content.",
    )
    
    chat_result = user_proxy_agent.initiate_chat(
        scraper_agent,
        message="Can you scrape https://example.com/ for me?",
        summary_method="reflection_with_llm",
        summary_args={
            "summary_prompt": """Summarize the scraped content and format summary EXACTLY as follows:
    ---
    *Website*:
    `https://example.com/`
    ---
    *content*:
    `[CONTENT GOES HERE]`
    ---
    """
        },
    )
    
    print(chat_result)