ScrapeGraphAI
diff --git a/‎dist/scrapegraphaiapisdk-0.0.1-py3-none-any.whl‎
2.71 KB b/‎dist/scrapegraphaiapisdk-0.0.1-py3-none-any.whl‎
2.71 KB
diff --git a/‎dist/scrapegraphaiapisdk-0.0.1.tar.gz‎
3.66 KB b/‎dist/scrapegraphaiapisdk-0.0.1.tar.gz‎
3.66 KB
diff --git a/‎examples/.env‎
Lines changed: 1 addition & 0 deletions b/‎examples/.env‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/scrape_example.py‎
Lines changed: 10 additions & 33 deletions b/‎examples/scrape_example.py‎
Lines changed: 10 additions & 33 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎requirements-dev.lock‎
Lines changed: 11 additions & 0 deletions b/‎requirements-dev.lock‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎requirements.lock‎
Lines changed: 21 additions & 0 deletions b/‎requirements.lock‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎scrapegraphaiapisdk/__pycache__/__init__.cpython-39.pyc‎
175 Bytes b/‎scrapegraphaiapisdk/__pycache__/__init__.cpython-39.pyc‎
175 Bytes
diff --git a/‎scrapegraphaiapisdk/__pycache__/scrape.cpython-39.pyc‎
2.06 KB b/‎scrapegraphaiapisdk/__pycache__/scrape.cpython-39.pyc‎
2.06 KB
diff --git a/‎scrapegraphaiapisdk/scrape.py‎
Lines changed: 35 additions & 20 deletions b/‎scrapegraphaiapisdk/scrape.py‎
Lines changed: 35 additions & 20 deletions
@@ -0,0 +1 @@
+SCRAPEGRAPH_API_KEY="***REMOVED***"
@@ -1,39 +1,16 @@
-import os
-from dotenv import load_dotenv
 from scrapegraphaiapisdk.scrape import scrape
-from pydantic import BaseModel
-from typing import List
-
-# Load environment variables from .env file
-load_dotenv()
-
-class Product(BaseModel):
-    name: str
-    price: float
-    description: str
-
-class ProductList(BaseModel):
-    products: List[Product]
+from dotenv import load_dotenv  # Import load_dotenv
+import os  # Import os to access environment variables
+import json  # Import json for beautifying output
 
 def main():
-    # Get API key from environment variables
+    """Main function to execute the scraping process."""
+    load_dotenv()
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
-    
-    # URL to scrape
-    url = "https://example.com/products"
-    
-    # Natural language prompt
-    prompt = "Extract all products from this page including their names, prices, and descriptions"
-    
-    # Create schema
-    schema = ProductList
-    
-    # Make the request
-    try:
-        result = scrape(api_key, url, prompt, schema)
-        print(f"Scraped data: {result}")
-    except Exception as e:
-        print(f"Error occurred: {e}")
+    url = "https://scrapegraphai.com/"
+    prompt = "What does the company do?"
 
+    result = scrape(api_key, url, prompt)
+    print(result)
 if __name__ == "__main__":
-    main() 
+    main()
@@ -10,7 +10,8 @@ authors = [
 
 dependencies = [
     "requests>=2.32.3",
-    "pydantic>=2.9.2"
+    "pydantic>=2.9.2",
+    "python-dotenv>=1.0.1"
 ]
 
 license = "MIT"
 
@@ -10,6 +10,8 @@
 -e file:.
 alabaster==0.7.16
     # via sphinx
+annotated-types==0.7.0
+    # via pydantic
 astroid==3.3.5
     # via pylint
 babel==2.16.0
@@ -51,14 +53,21 @@ platformdirs==4.3.6
     # via pylint
 pluggy==1.5.0
     # via pytest
+pydantic==2.9.2
+    # via scrapegraphaiapisdk
+pydantic-core==2.23.4
+    # via pydantic
 pygments==2.18.0
     # via furo
     # via sphinx
 pylint==3.3.1
 pytest==8.0.0
     # via pytest-mock
 pytest-mock==3.14.0
+python-dotenv==1.0.1
+    # via scrapegraphaiapisdk
 requests==2.32.3
+    # via scrapegraphaiapisdk
     # via sphinx
 snowballstemmer==2.2.0
     # via sphinx
@@ -89,6 +98,8 @@ tomlkit==0.13.2
     # via pylint
 typing-extensions==4.12.2
     # via astroid
+    # via pydantic
+    # via pydantic-core
     # via pylint
 urllib3==2.2.3
     # via requests
 
@@ -8,3 +8,24 @@
 #   with-sources: false
 
 -e file:.
+annotated-types==0.7.0
+    # via pydantic
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.4.0
+    # via requests
+idna==3.10
+    # via requests
+pydantic==2.9.2
+    # via scrapegraphaiapisdk
+pydantic-core==2.23.4
+    # via pydantic
+python-dotenv==1.0.1
+    # via scrapegraphaiapisdk
+requests==2.32.3
+    # via scrapegraphaiapisdk
+typing-extensions==4.12.2
+    # via pydantic
+    # via pydantic-core
+urllib3==2.2.3
+    # via requests
@@ -1,38 +1,53 @@
-"""
-This module provides a function to scrape and extract structured data from a webpage
-using the ScrapeGraph AI API. It allows specifying a schema for the output structure
-using a Pydantic model.
-"""
-
 from pydantic import BaseModel
 import requests
+import argparse
+from typing import Optional
+import json
+
+class ExampleSchema(BaseModel):
+    """Define an example schema for the output structure, if needed."""
+    name: str
+    description: str
 
-def scrape(api_key: str, url: str, prompt: str, schema: BaseModel) -> str:
+def scrape(api_key: str, url: str, prompt: str, schema: Optional[BaseModel] = None) -> str:
     """Scrape and extract structured data from a webpage using ScrapeGraph AI.
 
     Args:
-        api_key (str): Your ScrapeGraph AI API key
-        url (str): The URL of the webpage to scrape
-        prompt (str): Natural language prompt describing what data to extract
-        schema (BaseModel): Pydantic model defining the output structure.
-            The model will be converted to JSON schema before making the request.
+        api_key (str): Your ScrapeGraph AI API key.
+        url (str): The URL of the webpage to scrape.
+        prompt (str): Natural language prompt describing what data to extract.
+        schema (Optional[BaseModel]): Pydantic model defining the output structure,
+            if provided. The model will be converted to JSON schema before making 
+            the request.
 
     Returns:
-        str: Extracted data in JSON format matching the provided schema
+        str: Extracted data in JSON format matching the provided schema.
     """
-    endpoint = "https://api.scrapegraph.ai/v1/scrape"
+    endpoint = "https://sgai-api.onrender.com/api/v1/smartscraper"
     headers = {
-        "Authorization": f"Bearer {api_key}",
+        "accept": "application/json",
+        "SGAI-API-KEY": api_key,
         "Content-Type": "application/json"
     }
 
     payload = {
-        "url": url,
-        "prompt": prompt,
-        "schema": schema.model_json_schema()
+        "website_url": url,
+        "user_prompt": prompt
     }
 
-    response = requests.post(endpoint, headers=headers, json=payload)
-    response.raise_for_status()
+    if schema:
+        payload["schema"] = schema.model_json_schema()
+    
+    try:
+        response = requests.post(endpoint, headers=headers, json=payload)
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors specifically
+        if response.status_code == 403:
+            return json.dumps({"error": "Access forbidden (403)", "message": "You do not have permission to access this resource."})
+        return json.dumps({"error": "HTTP error occurred", "message": str(http_err), "status_code": response.status_code})
+    except requests.exceptions.RequestException as e:
+        # Handle other request exceptions (e.g., connection errors, timeouts)
+        return json.dumps({"error": "An error occurred", "message": str(e)})
 
     return response.text
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,8 @@ authors = [`
`10`	`10`
`11`	`11`	`dependencies = [`
`12`	`12`	`"requests>=2.32.3",`
`13`		`- "pydantic>=2.9.2"`
	`13`	`+ "pydantic>=2.9.2",`
	`14`	`+ "python-dotenv>=1.0.1"`
`14`	`15`	`]`
`15`	`16`
`16`	`17`	`license = "MIT"`