docs
Examples
Python

GetScraping API Examples - Python

First, let's initialize the GetScraping client:

from getscraping import GetScrapingClient
 
client = GetScrapingClient('your_api_key_here')

1. Basic GET request with retry configuration

This example demonstrates a simple GET request with retry logic and a custom timeout.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, RetryConfig
 
def basic_get_request(url: str) -> str:
    options = GetScrapingParams(
        url=url,
        method='GET',
        retry_config=RetryConfig(
            num_retries=3,
            success_status_codes=[200, 201]
        ),
        timeout_millis=30000
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.body.get_text()

2. POST request with custom headers and body

This example shows how to make a POST request with custom headers and a JSON body.

import json
 
def post_request_with_headers(url: str, data: dict) -> dict:
    options = GetScrapingParams(
        url=url,
        method='POST',
        headers={
            'Content-Type': 'application/json',
            'Custom-Header': 'CustomValue'
        },
        body=json.dumps(data),
        omit_default_headers=True
    )
    
    response = client.scrape(options)
    return response.json()

3. Using JavaScript rendering with wait conditions

This example demonstrates how to use JavaScript rendering with a wait condition for a specific selector.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, JavascriptRenderingOptions
 
def render_js_with_wait(url: str) -> str:
    options = GetScrapingParams(
        url=url,
        method='GET',
        js_rendering_options=JavascriptRenderingOptions(
            render_js=True,
            wait_for_selector='#dynamic-content',
            wait_millis=5000
        )
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    dynamic_content = soup.select_one('#dynamic-content')
    return dynamic_content.decode_contents() if dynamic_content else ''

4. Intercepting a specific request

This example shows how to intercept a specific API request during page load.

from getscraping import GetScrapingParams, JavascriptRenderingOptions, InterceptRequestParams
 
def intercept_request(url: str) -> dict:
    options = GetScrapingParams(
        url=url,
        method='GET',
        js_rendering_options=JavascriptRenderingOptions(
            render_js=True,
            intercept_request=InterceptRequestParams(
                intercepted_url_regex='.*api\/data.*',
                intercepted_url_method='POST',
                return_json=True
            )
        )
    )
    
    response = client.scrape(options)
    return response.json()

5. Using programmable browser actions

This example demonstrates how to use programmable browser actions to interact with the page.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, JavascriptRenderingOptions, ProgrammableBrowserOptions, ProgrammableBrowserAction
 
def programmable_browser_actions(url: str) -> str:
    options = GetScrapingParams(
        url=url,
        method='GET',
        js_rendering_options=JavascriptRenderingOptions(
            render_js=True,
            programmable_browser=ProgrammableBrowserOptions(
                actions=[
                    ProgrammableBrowserAction(type='click', selector='#load-more-button', wait_millis=2000),
                    ProgrammableBrowserAction(type='scroll', selector='body', wait_millis=1000),
                    ProgrammableBrowserAction(type='execute_js', javascript='window.scrollTo(0, document.body.scrollHeight);', wait_millis=2000)
                ]
            )
        )
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.select_one('.content').get_text()

6. Using different proxy types

This example shows how to use different proxy types for your requests.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams
 
def proxy_request(url: str) -> str:
    options = GetScrapingParams(
        url=url,
        method='GET',
        use_residential_proxy=True
        # Alternatively:
        # use_isp_proxy=True,
        # use_mobile_proxy=True,
        # use_own_proxy='http://username:password@proxy.example.com:8080',
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.body.decode_contents()

7. Handling cookies

This example demonstrates how to handle cookies in your requests and responses.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams
from typing import List, Dict
 
def cookie_handling(url: str, initial_cookies: List[str]) -> Dict[str, Union[str, List[str]]]:
    options = GetScrapingParams(
        url=url,
        method='GET',
        cookies=initial_cookies
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    new_cookies = response.headers.get('set-cookie', [])
    return {'content': soup.body.get_text(), 'cookies': new_cookies}

8. Using custom success criteria

This example shows how to use custom success criteria for your requests.

from bs4 import BeautifulSoup
from getscraping import GetScrapingParams, RetryConfig
 
def custom_success_criteria(url: str) -> str:
    options = GetScrapingParams(
        url=url,
        method='GET',
        retry_config=RetryConfig(
            num_retries=3,
            success_status_codes=[200],
            success_selector='#content-loaded'
        )
    )
    
    response = client.scrape(options)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.select_one('#content-loaded').get_text()

9. Combining multiple features

This example demonstrates how to combine multiple features of the GetScraping API in a single request.

from getscraping import GetScrapingParams, JavascriptRenderingOptions, ProgrammableBrowserOptions, ProgrammableBrowserAction, RetryConfig
 
def complex_request(url: str) -> dict:
    options = GetScrapingParams(
        url=url,
        method='GET',
        js_rendering_options=JavascriptRenderingOptions(
            render_js=True,
            wait_for_selector='#app-loaded',
            programmable_browser=ProgrammableBrowserOptions(
                actions=[
                    ProgrammableBrowserAction(type='click', selector='#accept-cookies', wait_millis=1000),
                    ProgrammableBrowserAction(type='execute_js', javascript='document.querySelector("#load-more").click();', wait_millis=2000)
                ]
            )
        ),
        use_residential_proxy=True,
        retry_config=RetryConfig(
            num_retries=5,
            success_status_codes=[200],
            success_selector='#all-content-loaded'
        ),
        timeout_millis=60000,
        response_type='json'
    )
    
    response = client.scrape(options)
    return response.json()

These examples showcase various features of the GetScraping API, including basic requests, JavaScript rendering, request interception, programmable browser actions, proxy usage, cookie handling, custom success criteria, and combining multiple features in a single request. They also demonstrate the use of BeautifulSoup for HTML parsing when dealing with HTML responses.

Remember to install the necessary packages if you haven't already:

pip install getscraping beautifulsoup4