apify · vdusek · Apr 24, 2025 · Apr 19, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/docs/deployment/code_examples/google/cloud_run_example.py b/docs/deployment/code_examples/google/cloud_run_example.py
@@ -0,0 +1,55 @@
+# mypy: disable-error-code="misc"
+import json
+import os
+
+import uvicorn
+from litestar import Litestar, get
+
+from crawlee import service_locator
+from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
+
+# highlight-start
+# Disable writing storage data to the file system
+configuration = service_locator.get_configuration()
+configuration.persist_storage = False
+configuration.write_metadata = False
+# highlight-end
+
+
+@get('/')
+async def main() -> str:
+    """The crawler entry point that will be called when the HTTP endpoint is accessed."""
+    crawler = PlaywrightCrawler(
+        headless=True,
+        max_requests_per_crawl=10,
+        browser_type='firefox',
+    )
+
+    @crawler.router.default_handler
+    async def default_handler(context: PlaywrightCrawlingContext) -> None:
+        """Default request handler that processes each page during crawling."""
+        context.log.info(f'Processing {context.request.url} ...')
+        title = await context.page.query_selector('title')
+        await context.push_data(
+            {
+                'url': context.request.loaded_url,
+                'title': await title.inner_text() if title else None,
+            }
+        )
+
+        await context.enqueue_links()
+
+    await crawler.run(['https://crawlee.dev'])
+
+    data = await crawler.get_data()
+
+    # Return the results as JSON to the client
+    return json.dumps(data.items)
+
+
+# Initialize the Litestar app with our route handler
+app = Litestar(route_handlers=[main])
+
+# Start the Uvicorn server using the `PORT` environment variable provided by GCP
+# This is crucial - Cloud Run expects your app to listen on this specific port
+uvicorn.run(app, host='0.0.0.0', port=int(os.environ.get('PORT', '8080')))  # noqa: S104 # Use all interfaces in a container, safely
diff --git a/docs/deployment/google_cloud.mdx b/docs/deployment/google_cloud.mdx
@@ -1,7 +1,7 @@
 ---
-id: gcp-functions
-title: Deploy to GCP Cloud Functions
-description: Prepare your crawler to run in Cloud functions on Google Cloud Platform
+id: gcp-cloud-run-functions
+title: Cloud Run functions
+description: Prepare your crawler to run in Cloud Run functions on Google Cloud Platform.
 ---
 
 import ApiLink from '@site/src/components/ApiLink';
@@ -10,11 +10,13 @@ import CodeBlock from '@theme/CodeBlock';
 
 import GoogleFunctions from '!!raw-loader!./code_examples/google/google_example.py';
 
+[Google Cloud Run Functions](https://cloud.google.com/functions) is a serverless execution environment for running simple HTTP-based web scrapers. This service is best suited for lightweight crawlers that don't require browser rendering capabilities and can be executed via HTTP requests.
+
 ## Updating the project
 
 For the project foundation, use <ApiLink to="class/BeautifulSoupCrawler">BeautifulSoupCrawler</ApiLink> as described in this [example](../examples/beautifulsoup-crawler).
 
-Add [functions-framework](https://pypi.org/project/functions-framework/) to your dependencies file `requirements.txt`. If you're using a project manager like `poetry` or `uv`, export your dependencies to `requirements.txt`.
+Add [`functions-framework`](https://pypi.org/project/functions-framework/) to your dependencies file `requirements.txt`. If you're using a project manager like `poetry` or `uv`, export your dependencies to `requirements.txt`.
 
 Update the project code to make it compatible with Cloud Functions and return data in JSON format. Also add an entry point that Cloud Functions will use to run the project.
 

diff --git a/docs/deployment/google_cloud_run.mdx b/docs/deployment/google_cloud_run.mdx
@@ -0,0 +1,51 @@
+---
+id: gcp-cloud-run
+title: Cloud Run
+description: Prepare your crawler to run in Cloud Run on Google Cloud Platform.
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+
+import CodeBlock from '@theme/CodeBlock';
+
+import GoogleCloudRun from '!!raw-loader!./code_examples/google/cloud_run_example.py';
+
+
+[Google Cloud Run](https://cloud.google.com/run)  is a container-based serverless platform that allows you to run web crawlers with headless browsers. This service is recommended when your Crawlee applications need browser rendering capabilities, require more granular control, or have complex dependencies that aren't supported by [Cloud Functions](./gcp-cloud-run-functions).
+
+GCP Cloud Run allows you to deploy using Docker containers, giving you full control over your environment and the flexibility to use any web server framework of your choice, unlike Cloud Functions which are limited to [Flask](https://flask.palletsprojects.com/en/stable/).
+
+## Preparing the project
+
+We'll prepare our project using [Litestar](https://litestar.dev/) and the [Uvicorn](https://www.uvicorn.org/) web server. The HTTP server handler will wrap the crawler to communicate with clients. Because the Cloud Run platform sees only an opaque Docker container, we have to take care of this bit ourselves.
+
+:::info
+
+GCP passes you an environment variable called `PORT` - your HTTP server is expected to be listening on this port (GCP exposes this one to the outer world).
+
+:::
+
+<CodeBlock className="language-python">
+    {GoogleCloudRun.replace(/^.*?\n/, '')}
+</CodeBlock>
+
+
+:::tip
+
+Always make sure to keep all the logic in the request handler - as with other FaaS services, your request handlers have to be **stateless.**
+
+:::
+
+## Deploying to Google Cloud Platform
+
+Now, we’re ready to deploy! If you have initialized your project using `uvx crawlee create`, the initialization script has prepared a Dockerfile for you.
+
+All you have to do now is run `gcloud run deploy` in your project folder (the one with your Dockerfile in it). The gcloud CLI application will ask you a few questions, such as what region you want to deploy your application in, or whether you want to make your application public or private.
+
+After answering those questions, you should be able to see your application in the GCP dashboard and run it using the link you find there.
+
+:::tip
+
+In case your first execution of your newly created Cloud Run fails, try editing the Run configuration - mainly setting the available memory to 1GiB or more and updating the request timeout according to the size of the website you are scraping.
+
+:::
diff --git a/pyproject.toml b/pyproject.toml
@@ -235,6 +235,7 @@ module = [
     "flask",                        # Example code shows deploy on Google Cloud.
     "functions_framework",          # Example code shows deploy on Google Cloud.
     "jaro",                         # Untyped and stubs not available
+    "litestar",                     # Example code shows deploy on Google Cloud Run.
     "loguru",                       # Example code shows integration of loguru and crawlee for JSON logging.
     "sklearn.linear_model",         # Untyped and stubs not available
     "cookiecutter.*",               # Untyped and stubs not available

diff --git a/website/sidebars.js b/website/sidebars.js
@@ -64,8 +64,8 @@ module.exports = {
                     type: 'category',
                     label: 'Deploy to Google Cloud',
                     items: [
-                        'deployment/gcp-functions',
-                        // 'deployment/gcp-browsers',
+                        'deployment/gcp-cloud-run-functions',
+                        'deployment/gcp-cloud-run',
                     ],
                 },
             ],