docs/en/tools/web-scraping/oxylabsscraperstool.mdx
Get the credentials by creating an Oxylabs Account here.
pip install 'crewai[tools]' oxylabs
Check Oxylabs Documentation to get more information about API parameters.
OxylabsAmazonProductScraperToolfrom crewai_tools import OxylabsAmazonProductScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonProductScraperTool()
result = tool.run(query="AAAAABBBBCC")
print(result)
query - 10-symbol ASIN code.domain - domain localization for Amazon.geo_location - the Deliver to location.user_agent_type - device type and browser.render - enables JavaScript rendering when set to html.callback_url - URL to your callback endpoint.context - Additional advanced settings and controls for specialized requirements.parse - returns parsed data when set to true.parsing_instructions - define your own parsing and data transformation logic that will be executed on an HTML scraping result.from crewai_tools import OxylabsAmazonProductScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonProductScraperTool(
config={
"domain": "com",
"parse": True,
"context": [
{
"key": "autoselect_variant",
"value": True
}
]
}
)
result = tool.run(query="AAAAABBBBCC")
print(result)
OxylabsAmazonSearchScraperToolfrom crewai_tools import OxylabsAmazonSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonSearchScraperTool()
result = tool.run(query="headsets")
print(result)
query - Amazon search term.domain - Domain localization for Bestbuy.start_page - starting page number.pages - number of pages to retrieve.geo_location - the Deliver to location.user_agent_type - device type and browser.render - enables JavaScript rendering when set to html.callback_url - URL to your callback endpoint.context - Additional advanced settings and controls for specialized requirements.parse - returns parsed data when set to true.parsing_instructions - define your own parsing and data transformation logic that will be executed on an HTML scraping result.from crewai_tools import OxylabsAmazonSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsAmazonSearchScraperTool(
config={
"domain": 'nl',
"start_page": 2,
"pages": 2,
"parse": True,
"context": [
{'key': 'category_id', 'value': 16391693031}
],
}
)
result = tool.run(query='nirvana tshirt')
print(result)
OxylabsGoogleSearchScraperToolfrom crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool()
result = tool.run(query="iPhone 16")
print(result)
query - search keyword.domain - domain localization for Google.start_page - starting page number.pages - number of pages to retrieve.limit - number of results to retrieve in each page.locale - Accept-Language header value which changes your Google search page web interface language.geo_location - the geographical location that the result should be adapted for. Using this parameter correctly is extremely important to get the right data.user_agent_type - device type and browser.render - enables JavaScript rendering when set to html.callback_url - URL to your callback endpoint.context - Additional advanced settings and controls for specialized requirements.parse - returns parsed data when set to true.parsing_instructions - define your own parsing and data transformation logic that will be executed on an HTML scraping result.from crewai_tools import OxylabsGoogleSearchScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsGoogleSearchScraperTool(
config={
"parse": True,
"geo_location": "Paris, France",
"user_agent_type": "tablet",
}
)
result = tool.run(query="iPhone 16")
print(result)
OxylabsUniversalScraperToolfrom crewai_tools import OxylabsUniversalScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsUniversalScraperTool()
result = tool.run(url="https://ip.oxylabs.io")
print(result)
url - website url to scrape.user_agent_type - device type and browser.geo_location - sets the proxy's geolocation to retrieve data.render - enables JavaScript rendering when set to html.callback_url - URL to your callback endpoint.context - Additional advanced settings and controls for specialized requirements.parse - returns parsed data when set to true, as long as a dedicated parser exists for the submitted URL's page type.parsing_instructions - define your own parsing and data transformation logic that will be executed on an HTML scraping result.from crewai_tools import OxylabsUniversalScraperTool
# make sure OXYLABS_USERNAME and OXYLABS_PASSWORD variables are set
tool = OxylabsUniversalScraperTool(
config={
"render": "html",
"user_agent_type": "mobile",
"context": [
{"key": "force_headers", "value": True},
{"key": "force_cookies", "value": True},
{
"key": "headers",
"value": {
"Custom-Header-Name": "custom header content",
},
},
{
"key": "cookies",
"value": [
{"key": "NID", "value": "1234567890"},
{"key": "1P JAR", "value": "0987654321"},
],
},
{"key": "http_method", "value": "get"},
{"key": "follow_redirects", "value": True},
{"key": "successful_status_codes", "value": [808, 909]},
],
}
)
result = tool.run(url="https://ip.oxylabs.io")
print(result)