GetScraping API Examples - Python
First, let's initialize the GetScraping client:
from getscraping import GetScrapingClient
client = GetScrapingClient('your_api_key_here')
1. Basic GET request with retry configuration
This example demonstrates a simple GET request with retry logic and a custom timeout.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, RetryConfig
def basic_get_request(url: str) -> str:
options = GetScrapingParams(
url=url,
method='GET',
retry_config=RetryConfig(
num_retries=3,
success_status_codes=[200, 201]
),
timeout_millis=30000
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.body.get_text()
2. POST request with custom headers and body
This example shows how to make a POST request with custom headers and a JSON body.
import json
def post_request_with_headers(url: str, data: dict) -> dict:
options = GetScrapingParams(
url=url,
method='POST',
headers={
'Content-Type': 'application/json',
'Custom-Header': 'CustomValue'
},
body=json.dumps(data),
omit_default_headers=True
)
response = client.scrape(options)
return response.json()
3. Using JavaScript rendering with wait conditions
This example demonstrates how to use JavaScript rendering with a wait condition for a specific selector.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, JavascriptRenderingOptions
def render_js_with_wait(url: str) -> str:
options = GetScrapingParams(
url=url,
method='GET',
js_rendering_options=JavascriptRenderingOptions(
render_js=True,
wait_for_selector='#dynamic-content',
wait_millis=5000
)
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
dynamic_content = soup.select_one('#dynamic-content')
return dynamic_content.decode_contents() if dynamic_content else ''
4. Intercepting a specific request
This example shows how to intercept a specific API request during page load.
from getscraping import GetScrapingParams, JavascriptRenderingOptions, InterceptRequestParams
def intercept_request(url: str) -> dict:
options = GetScrapingParams(
url=url,
method='GET',
js_rendering_options=JavascriptRenderingOptions(
render_js=True,
intercept_request=InterceptRequestParams(
intercepted_url_regex='.*api\/data.*',
intercepted_url_method='POST',
return_json=True
)
)
)
response = client.scrape(options)
return response.json()
5. Using programmable browser actions
This example demonstrates how to use programmable browser actions to interact with the page.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, JavascriptRenderingOptions, ProgrammableBrowserOptions, ProgrammableBrowserAction
def programmable_browser_actions(url: str) -> str:
options = GetScrapingParams(
url=url,
method='GET',
js_rendering_options=JavascriptRenderingOptions(
render_js=True,
programmable_browser=ProgrammableBrowserOptions(
actions=[
ProgrammableBrowserAction(type='click', selector='#load-more-button', wait_millis=2000),
ProgrammableBrowserAction(type='scroll', selector='body', wait_millis=1000),
ProgrammableBrowserAction(type='execute_js', javascript='window.scrollTo(0, document.body.scrollHeight);', wait_millis=2000)
]
)
)
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.select_one('.content').get_text()
6. Using different proxy types
This example shows how to use different proxy types for your requests.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams
def proxy_request(url: str) -> str:
options = GetScrapingParams(
url=url,
method='GET',
use_residential_proxy=True
# Alternatively:
# use_isp_proxy=True,
# use_mobile_proxy=True,
# use_own_proxy='http://username:password@proxy.example.com:8080',
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.body.decode_contents()
7. Handling cookies
This example demonstrates how to handle cookies in your requests and responses.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams
from typing import List, Dict
def cookie_handling(url: str, initial_cookies: List[str]) -> Dict[str, Union[str, List[str]]]:
options = GetScrapingParams(
url=url,
method='GET',
cookies=initial_cookies
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
new_cookies = response.headers.get('set-cookie', [])
return {'content': soup.body.get_text(), 'cookies': new_cookies}
8. Using custom success criteria
This example shows how to use custom success criteria for your requests.
from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, RetryConfig
def custom_success_criteria(url: str) -> str:
options = GetScrapingParams(
url=url,
method='GET',
retry_config=RetryConfig(
num_retries=3,
success_status_codes=[200],
success_selector='#content-loaded'
)
)
response = client.scrape(options)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.select_one('#content-loaded').get_text()
9. Combining multiple features
This example demonstrates how to combine multiple features of the GetScraping API in a single request.
from getscraping import GetScrapingParams, JavascriptRenderingOptions, ProgrammableBrowserOptions, ProgrammableBrowserAction, RetryConfig
def complex_request(url: str) -> dict:
options = GetScrapingParams(
url=url,
method='GET',
js_rendering_options=JavascriptRenderingOptions(
render_js=True,
wait_for_selector='#app-loaded',
programmable_browser=ProgrammableBrowserOptions(
actions=[
ProgrammableBrowserAction(type='click', selector='#accept-cookies', wait_millis=1000),
ProgrammableBrowserAction(type='execute_js', javascript='document.querySelector("#load-more").click();', wait_millis=2000)
]
)
),
use_residential_proxy=True,
retry_config=RetryConfig(
num_retries=5,
success_status_codes=[200],
success_selector='#all-content-loaded'
),
timeout_millis=60000,
response_type='json'
)
response = client.scrape(options)
return response.json()
These examples showcase various features of the GetScraping API, including basic requests, JavaScript rendering, request interception, programmable browser actions, proxy usage, cookie handling, custom success criteria, and combining multiple features in a single request. They also demonstrate the use of BeautifulSoup for HTML parsing when dealing with HTML responses.
Remember to install the necessary packages if you haven't already:
pip install getscraping beautifulsoup4