Skip to content

feat(github): add repository and organization scoping support #8329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions prowler/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ All notable changes to the **Prowler SDK** are documented in this file.
- CIS 4.0 for the Azure provider [(#7782)](https://github.com/prowler-cloud/prowler/pull/7782)
- `vm_desired_sku_size` check for Azure provider [(#8191)](https://github.com/prowler-cloud/prowler/pull/8191)
- `vm_scaleset_not_empty` check for Azure provider [(#8192)](https://github.com/prowler-cloud/prowler/pull/8192)
- GitHub repository and organization scoping support with `--repository` and `--organization` flags [(#8329)](https://github.com/prowler-cloud/prowler/pull/8329)

### Changed
- Handle some AWS errors as warnings instead of errors [(#8347)](https://github.com/prowler-cloud/prowler/pull/8347)
Expand Down
2 changes: 2 additions & 0 deletions prowler/providers/common/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ def init_global_provider(arguments: Namespace) -> None:
github_app_id=arguments.github_app_id,
mutelist_path=arguments.mutelist_file,
config_path=arguments.config_file,
repositories=arguments.repository,
organizations=arguments.organization,
)
elif "iac" in provider_class_name.lower():
provider_class(
Expand Down
30 changes: 28 additions & 2 deletions prowler/providers/github/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class GithubProvider(Provider):
_audit_config (dict): The audit configuration for the provider.
_fixer_config (dict): The fixer configuration for the provider.
_mutelist (Mutelist): The mutelist for the provider.
_repositories (list): List of repository names to scan in 'owner/repo-name' format.
_organizations (list): List of organization or user names to scan repositories for.
audit_metadata (Audit_Metadata): The audit metadata for the provider.
"""

Expand All @@ -91,6 +93,8 @@ class GithubProvider(Provider):
_identity: GithubIdentityInfo
_audit_config: dict
_mutelist: Mutelist
_repositories: list
_organizations: list
audit_metadata: Audit_Metadata

def __init__(
Expand All @@ -107,6 +111,8 @@ def __init__(
fixer_config: dict = {},
mutelist_path: str = None,
mutelist_content: dict = None,
repositories: list = None,
organizations: list = None,
):
"""
GitHub Provider constructor
Expand All @@ -122,9 +128,15 @@ def __init__(
fixer_config (dict): Fixer configuration content.
mutelist_path (str): Path to the mutelist file.
mutelist_content (dict): Mutelist content.
repositories (list): List of repository names to scan in 'owner/repo-name' format.
organizations (list): List of organization or user names to scan repositories for.
"""
logger.info("Instantiating GitHub Provider...")

# Set repositories and organizations for scoping
self._repositories = repositories or []
self._organizations = organizations or []

self._session = GithubProvider.setup_session(
personal_access_token,
oauth_app_token,
Expand Down Expand Up @@ -213,6 +225,20 @@ def mutelist(self) -> GithubMutelist:
"""
return self._mutelist

@property
def repositories(self) -> list:
"""
repositories method returns the provider's repository list for scoping.
"""
return self._repositories

@property
def organizations(self) -> list:
"""
organizations method returns the provider's organization list for scoping.
"""
return self._organizations

@staticmethod
def setup_session(
personal_access_token: str = None,
Expand Down Expand Up @@ -295,7 +321,7 @@ def setup_session(

except Exception as error:
logger.critical(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}] -- {error}"
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
raise GithubSetUpSessionError(
original_exception=error,
Expand Down Expand Up @@ -344,7 +370,7 @@ def setup_identity(

except Exception as error:
logger.critical(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}] -- {error}"
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
raise GithubSetUpIdentityError(
original_exception=error,
Expand Down
16 changes: 16 additions & 0 deletions prowler/providers/github/lib/arguments/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,19 @@ def init_parser(self):
default=None,
metavar="GITHUB_APP_KEY",
)

github_scoping_subparser = github_parser.add_argument_group("Scan Scoping")
github_scoping_subparser.add_argument(
"--repository",
nargs="*",
help="Repository name(s) to scan in 'owner/repo-name' format",
default=None,
metavar="REPOSITORY",
)
github_scoping_subparser.add_argument(
"--organization",
nargs="*",
help="Organization or user name(s) to scan repositories for",
default=None,
metavar="ORGANIZATION",
)
52 changes: 52 additions & 0 deletions prowler/providers/github/lib/service/service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import github
from github import Auth, Github, GithubIntegration
from github.GithubRetry import GithubRetry

Expand All @@ -11,6 +12,7 @@ def __init__(
service: str,
provider: GithubProvider,
):
self.provider = provider
self.clients = self.__set_clients__(
provider.session,
)
Expand Down Expand Up @@ -41,3 +43,53 @@ def __set_clients__(self, session):
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
return clients

def _handle_github_api_error(
self, error, context: str, item_name: str, reraise_rate_limit: bool = False
):
"""Centralized GitHub API error handling"""
if isinstance(error, github.RateLimitExceededException):
logger.error(f"Rate limit exceeded while {context} '{item_name}': {error}")
if reraise_rate_limit:
raise
elif isinstance(error, github.GithubException):
if "404" in str(error):
logger.warning(f"'{item_name}' not found or not accessible")
elif "403" in str(error):
logger.warning(
f"Access denied to '{item_name}' - insufficient permissions"
)
else:
logger.error(f"GitHub API error for '{item_name}': {error}")
else:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)

def _get_organization_or_user(self, client, name: str):
"""Try to get entity as organization, fallback to user if needed"""
try:
org = client.get_organization(name)
return org.get_repos(), "organization"
except github.GithubException as error:
if "404" in str(error):
logger.info(f"'{name}' not found as organization, trying as user...")
try:
user = client.get_user(name)
return user.get_repos(), "user"
except github.GithubException as user_error:
self._handle_github_api_error(
user_error, "accessing", f"{name} as user"
)
return [], "none"
except Exception as user_error:
self._handle_github_api_error(
user_error, "accessing", f"{name} as user"
)
return [], "none"
else:
self._handle_github_api_error(error, "accessing organization", name)
return [], "none"
except Exception as error:
self._handle_github_api_error(error, "accessing organization", name)
return [], "none"
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional

import github
from pydantic.v1 import BaseModel

from prowler.lib.logger import logger
Expand All @@ -12,29 +13,135 @@ def __init__(self, provider):
self.organizations = self._list_organizations()

def _list_organizations(self):
"""
List organizations based on provider scoping configuration.

Scoping behavior:
- No scoping: Returns all organizations for authenticated user
- Organization scoping: Returns only specified organizations
Example: --organization org1 org2
- Repository + Organization scoping: Returns specified organizations + repository owners
Example: --repository owner1/repo1 --organization org2
- Repository only: Returns empty (no organization checks)
Example: --repository owner1/repo1

Returns:
dict: Dictionary of organization ID to Org objects

Raises:
github.GithubException: When GitHub API access fails
github.RateLimitExceededException: When API rate limits are exceeded
"""
logger.info("Organization - Listing Organizations...")
organizations = {}
org_names_to_check = set()

try:
for client in self.clients:
for org in client.get_user().get_orgs():
try:
require_mfa = org.two_factor_requirement_enabled
except Exception as error:
require_mfa = None
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
organizations[org.id] = Org(
id=org.id,
name=org.login,
mfa_required=require_mfa,
)
if self.provider.organizations:
org_names_to_check.update(self.provider.organizations)

# If repositories are specified without organizations, don't perform organization checks
# Only add repository owners to organization checks if organizations are also specified
if self.provider.repositories and self.provider.organizations:
for repo_name in self.provider.repositories:
if "/" in repo_name:
owner_name = repo_name.split("/")[0]
org_names_to_check.add(owner_name)
logger.info(
f"Adding owner '{owner_name}' from repository '{repo_name}' to organization check list"
)

# If specific organizations/owners are specified, check them directly
if org_names_to_check:
for org_name in org_names_to_check:
try:
try:
org = client.get_organization(org_name)
self._process_organization(org, organizations)
except github.GithubException as org_error:
# If organization fails, try as a user (personal account)
if "404" in str(org_error):
logger.info(
f"'{org_name}' not found as organization, trying as user..."
)
try:
user = client.get_user(org_name)
# Create a pseudo-organization for the user
organizations[user.id] = Org(
id=user.id,
name=user.login,
mfa_required=None, # Users don't have MFA requirements like orgs
)
logger.info(
f"Added user '{user.login}' as organization for checks"
)
except github.GithubException as user_error:
if "404" in str(user_error):
logger.warning(
f"'{org_name}' not found as organization or user"
)
elif "403" in str(user_error):
logger.warning(
f"Access denied to '{org_name}' - insufficient permissions"
)
else:
logger.warning(
f"GitHub API error accessing '{org_name}' as user: {user_error}"
)
except Exception as user_error:
logger.error(
f"{user_error.__class__.__name__}[{user_error.__traceback__.tb_lineno}]: {user_error}"
)
elif "403" in str(org_error):
logger.warning(
f"Access denied to organization '{org_name}' - insufficient permissions"
)
else:
logger.error(
f"GitHub API error accessing organization '{org_name}': {org_error}"
)
except github.RateLimitExceededException as error:
logger.error(
f"Rate limit exceeded while processing organization '{org_name}': {error}"
)
raise # Re-raise rate limit errors as they need special handling
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
elif not self.provider.repositories:
# Default behavior: get all organizations the user is a member of
# Only when no repositories are specified
for org in client.get_user().get_orgs():
self._process_organization(org, organizations)

except github.RateLimitExceededException as error:
logger.error(f"GitHub API rate limit exceeded: {error}")
raise # Re-raise rate limit errors as they need special handling
except github.GithubException as error:
logger.error(f"GitHub API error while listing organizations: {error}")
except Exception as error:
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
return organizations

def _process_organization(self, org, organizations):
"""Process a single organization and extract its information."""
try:
require_mfa = org.two_factor_requirement_enabled
except Exception as error:
require_mfa = None
logger.error(
f"{error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}"
)
organizations[org.id] = Org(
id=org.id,
name=org.login,
mfa_required=require_mfa,
)


class Org(BaseModel):
"""Model for Github Organization"""
Expand Down
Loading