diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..8476c21 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,34 @@ +name: Docker Build & Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + load: true + tags: github-dorks:test + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Test Docker image + run: | + # Test the version flag with version flag + docker run github-dorks:test -v + + - name: Verify image size + run: docker image ls github-dorks:test diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..038f261 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,28 @@ +name: Lint (flake8) + +on: + push: + branches: [ "**" ] + pull_request: + branches: [ "**" ] + +jobs: + flake8: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.8' + + - name: Install flake8 + run: | + python -m pip install --upgrade pip + pip install flake8 + + - name: Run flake8 + run: | + flake8 . diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0d8ecb9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Use Python 3.8 as base - this version has good compatibility with older packages +FROM python:3.8-slim + +# Set working directory +WORKDIR /app + +# Install git (needed for pip install from git repos) +RUN apt-get update && \ + apt-get install -y git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Copy only the necessary files +COPY github-dork.py /app/ +COPY github-dorks.txt /app/ +COPY setup.py /app/ +COPY README.md /app/ +COPY requirements.txt /app/ + +# Install dependencies +# Using the specific version of github3.py that's known to work +RUN pip install --no-cache-dir github3.py==1.0.0a2 feedparser==6.0.2 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +# Create volume for potential output files +VOLUME ["/app/output"] + +ENTRYPOINT ["python", "github-dork.py"] \ No newline at end of file diff --git a/README.md b/README.md index 3364b8f..eb36e4f 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,79 @@ +[![Docker Build & Test](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml/badge.svg)](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml) + # Github Dorks -[Github search](https://github.com/search) is quite powerful and useful feature and can be used to search sensitive data on the repositories. Collection of github dorks that can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. -### GitHub Dork Search Tool -[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. Its not a perfect tool at the moment but provides a basic functionality to automate the search on your repositories against the dorks specified in text file. +[Github Search](https://github.com/search) is a quite powerful and useful feature that can be used to search for sensitive data on repositories. Collection of Github dorks can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. + +## GitHub Dork Search Tool + +[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. It's not a perfect tool at the moment but provides basic functionality to automate the search on your repositories against the dorks specified in the text file. + +### Installation -#### Installation This tool uses [github3.py](https://github.com/sigmavirus24/github3.py) to talk with GitHub Search API. Clone this repository and run: + +```shell +pip install . +``` + +### Docker Installation + +You can also run github-dorks using Docker for a consistent environment: + ```shell -pip install -r requirements.txt +# Build the Docker image +docker build -t github-dorks . + +# Run with a GitHub token (recommended) +docker run -e GH_TOKEN=your_github_token github-dorks -u someuser + +# Run with username/password +docker run -e GH_USER=your_username -e GH_PWD=your_password github-dorks -u someuser + +# Save results to a CSV file +docker run -v $(pwd)/output:/app/output -e GH_TOKEN=your_github_token github-dorks -u someuser -o /app/output/results.csv ``` -#### Usage +### Usage + ``` -GH_USER - Environment variable to specify github user -GH_PWD - Environment variable to specify password -GH_TOKEN - Environment variable to specify github token +GH_USER - Environment variable to specify Github user +GH_PWD - Environment variable to specify a password +GH_TOKEN - Environment variable to specify Github token GH_URL - Environment variable to specify GitHub Enterprise base URL ``` Some example usages are listed below: ```shell -python github-dork.py -r techgaun/github-dorks # search single repo +github-dork.py -r techgaun/github-dorks # search a single repo -python github-dork.py -u techgaun # search all repos of user +github-dork.py -u techgaun # search all repos of a user -python github-dork.py -u dev-nepal # search all repos of an organization +github-dork.py -u dev-nepal # search all repos of an organization -GH_USER=techgaun GH_PWD= python github-dork.py -u dev-nepal # search as authenticated user +GH_USER=techgaun GH_PWD= github-dork.py -u dev-nepal # search as authenticated user -GH_TOKEN= python github-dork.py -u dev-nepal # search using auth token +GH_TOKEN= github-dork.py -u dev-nepal # search using auth token -GH_URL=https://github.example.com python github-dork.py -u dev-nepal # search a GitHub Enterprise instance +GH_URL=https://github.example.com github-dork.py -u dev-nepal # search a GitHub Enterprise instance ``` -#### Limitations +### Limitations - Authenticated requests get a higher rate limit. But, since this tool waits for the api rate limit to be reset (which is usually less than a minute), it can be slightly slow. - Output formatting is not great. PR welcome - ~~Handle rate limit and retry. PR welcome~~ ### Contribution -Please consider contributing the dorks that can reveal potentially sensitive information in github. + +Please consider contributing dorks that can reveal potentially sensitive information on Github. ### List of Dorks -I am not categorizing at the moment. Instead I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). + +I am not categorizing at the moment. Instead, I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). Dork | Description ------------------------------------------------|-------------------------------------------------------------------------- @@ -127,3 +154,10 @@ filename:.remote-sync.json | Created by remote-sync for Ato filename:sftp.json path:.vscode | Created by vscode-sftp for VSCode, contains SFTP/SSH server details and credentails filename:sftp-config.json | Created by SFTP for Sublime Text, contains FTP/FTPS or SFTP/SSH server details and credentials filename:WebServers.xml | Created by Jetbrains IDEs, contains webserver credentials with encoded passwords ([not encrypted!](https://intellij-support.jetbrains.com/hc/en-us/community/posts/207074025/comments/207034775)) +"api_hash" "api_id" | Telegram API token +"https://hooks.slack.com/services/" | Slack services URL often have secret API token as a suffix +filename:github-recovery-codes.txt | GitHub recovery key +filename:gitlab-recovery-codes.txt | GitLab recovery key +filename:discord_backup_codes.txt | Discord recovery key +extension:yaml cloud.redislabs.com | Redis credentials provided by Redis Labs found in a YAML file +extension:json cloud.redislabs.com | Redis credentials provided by Redis Labs found in a JSON file diff --git a/github-dork.py b/github-dork.py index 1ca7274..2941b04 100644 --- a/github-dork.py +++ b/github-dork.py @@ -7,7 +7,7 @@ import time import feedparser from copy import copy -from sys import stderr +from sys import stderr, prefix gh_user = os.getenv('GH_USER', None) gh_pass = os.getenv('GH_PWD', None) @@ -28,7 +28,7 @@ def search_wrapper(gen): yield next(gen) except StopIteration: return - except github.exceptions.ForbiddenError as e: + except github.exceptions.ForbiddenError: search_rate_limit = gh.rate_limit()['resources']['search'] # limit_remaining = search_rate_limit['remaining'] reset_time = search_rate_limit['reset'] @@ -87,7 +87,12 @@ def search(repo_to_search=None, output_filename=None): if gh_dorks_file is None: - gh_dorks_file = 'github-dorks.txt' + for path_prefix in ['.', os.path.join(prefix, 'github-dorks/')]: + filename = os.path.join(path_prefix, 'github-dorks.txt') + if os.path.isfile(filename): + gh_dorks_file = filename + break + if not os.path.isfile(gh_dorks_file): raise Exception('Error, the dorks file path is not valid') if user_to_search: diff --git a/github-dorks.txt b/github-dorks.txt index 8bcd5ec..c5625a6 100644 --- a/github-dorks.txt +++ b/github-dorks.txt @@ -79,4 +79,12 @@ filename:.ftpconfig filename:.remote-sync.json filename:sftp.json path:.vscode filename:WebServers.xml - +filename:jupyter_notebook_config.json +"api_hash" "api_id" +"https://hooks.slack.com/services/" +filename:github-recovery-codes.txt +filename:gitlab-recovery-codes.txt +filename:discord_backup_codes.txt +extension:yaml cloud.redislabs.com +extension:json cloud.redislabs.com +DATADOG_API_KEY language:shell diff --git a/requirements.txt b/requirements.txt index 9b4e16f..cfe346b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ github3.py==1.0.0a2 -feedparser==5.1.3 +feedparser==6.0.2 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..979e932 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + +with open('README.md', 'r') as f: + long_description = f.read() + +setup( + name='github-dorks', + version='0.1', + description='Find leaked secrets via github search.', + license='Apache License 2.0', + long_description=long_description, + author='Samar Dhwoj Acharya (@techgaun)', + long_description_content_type='text/markdown', + scripts=['github-dork.py'], + data_files=[('github-dorks', ['github-dorks.txt'])], + install_requires=[ + 'github3.py==4.0.1', + 'feedparser==6.0.2', + ], +)