! pip install -qqq pyautogen apify-client import os import openai config_list = [ {"model": "gpt-3.5-turbo", "api_key": "Enter your api key"}, ] from apify_client import ApifyClient from typing_extensions import Annotated def scrape_page(url: Annotated[str, "https://example.com/"]) -> Annotated[str, "Scraped content"]: # Initialize the ApifyClient with your API token client = ApifyClient(token="Enter your apify key") # Prepare the Actor input run_input = { "startUrls": [{"url": url}], "useSitemaps": False, "crawlerType": "playwright:firefox", "includeUrlGlobs": [], "excludeUrlGlobs": [], "ignoreCanonicalUrl": False, "maxCrawlDepth": 0, "maxCrawlPages": 4, "initialConcurrency": 0, "maxConcurrency": 200, "initialCookies": [], "proxyConfiguration": {"useApifyProxy": True}, "maxSessionRotations": 10, "maxRequestRetries": 5, "requestTimeoutSecs": 60, "dynamicContentWaitSecs": 10, "maxScrollHeightPixels": 5000, "removeElementsCssSelector": """nav, footer, script, style, noscript, svg, [role=\"alert\"], [role=\"banner\"], [role=\"dialog\"], [role=\"alertdialog\"], [role=\"region\"][aria-label*=\"skip\" i], [aria-modal=\"true\"]""", "removeCookieWarnings": True, "clickElementsCssSelector": '[aria-expanded="false"]', "htmlTransformer": "readableText", "readableTextCharThreshold": 100, "aggressivePrune": False, "debugMode": True, "debugLog": True, "saveHtml": True, "saveMarkdown": True, "saveFiles": False, "saveScreenshots": False, "maxResults": 9999999, "clientSideMinChangePercentage": 15, "renderingTypeDetectionPercentage": 10, } # Run the Actor and wait for it to finish run = client.actor("aYG0l9s7dbB7j3gbS").call(run_input=run_input) # Fetch and print Actor results from the run's dataset (if there are any) text_data = "" for item in client.dataset(run["defaultDatasetId"]).iterate_items(): text_data += item.get("text", "") + "\n" average_token = 0.75 max_tokens = 20000 # slightly less than max to be safe 32k text_data = text_data[: int(average_token * max_tokens)] return text_data from autogen import ConversableAgent, register_function # Create web scrapper agent. scraper_agent = ConversableAgent( "WebScraper", llm_config={"config_list": config_list}, system_message="You are a web scrapper and you can scrape any web page using the tools provided. " "Returns 'TERMINATE' when the scraping is done.", ) # Create user proxy agent. user_proxy_agent = ConversableAgent( "UserProxy", llm_config=False, # No LLM for this agent. human_input_mode="NEVER", code_execution_config=False, # No code execution for this agent. is_termination_msg=lambda x: x.get("content", "") is not None and "terminate" in x["content"].lower(), default_auto_reply="Please continue if not finished, otherwise return 'TERMINATE'.", ) # Register the function with the agents. register_function( scrape_page, caller=scraper_agent, executor=user_proxy_agent, name="scrape_page", description="Scrape a web page and return the content.", ) chat_result = user_proxy_agent.initiate_chat( scraper_agent, message="Can you scrape https://example.com/ for me?", summary_method="reflection_with_llm", summary_args={ "summary_prompt": """Summarize the scraped content and format summary EXACTLY as follows: --- *Website*: `https://example.com/` --- *content*: `[CONTENT GOES HERE]` --- """ }, ) print(chat_result)