diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..1f487de --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: techgaun diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..98b90f0 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,13 @@ +### Please include all of the following fields when adding dorks/patterns +- Search URL: https://github.com/search?q= +- Number of search results at time of PR: +- Impact of data disclosed (see table below): +- Description of data disclosed: + +| Icon/Name | Description | Examples | +|-----------|---------------------------------------------------------------------------------------------------------|----------------------------------------------------------------| +❓ Unknown | The impact of this data is highly variable or unknown) | N/A | +➖ Low | This data will provide minimal access or mostly public information) | Non-stored XSS, Limited scope + read-only API access | +➕ Moderate | This data will provide some access or information | Stored XSS in some cases, read-only or limited write API access| +⚠️ High | This data will provide single-user access or secret information) | Usernames/passwords, OAuth tokens | +❗️ Critical | This data will provide complete control, access to several users, or confidential/personal information | Credential database dumps, AWS keys diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..8476c21 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,34 @@ +name: Docker Build & Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + load: true + tags: github-dorks:test + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Test Docker image + run: | + # Test the version flag with version flag + docker run github-dorks:test -v + + - name: Verify image size + run: docker image ls github-dorks:test diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..038f261 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,28 @@ +name: Lint (flake8) + +on: + push: + branches: [ "**" ] + pull_request: + branches: [ "**" ] + +jobs: + flake8: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.8' + + - name: Install flake8 + run: | + python -m pip install --upgrade pip + pip install flake8 + + - name: Run flake8 + run: | + flake8 . diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0d8ecb9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Use Python 3.8 as base - this version has good compatibility with older packages +FROM python:3.8-slim + +# Set working directory +WORKDIR /app + +# Install git (needed for pip install from git repos) +RUN apt-get update && \ + apt-get install -y git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Copy only the necessary files +COPY github-dork.py /app/ +COPY github-dorks.txt /app/ +COPY setup.py /app/ +COPY README.md /app/ +COPY requirements.txt /app/ + +# Install dependencies +# Using the specific version of github3.py that's known to work +RUN pip install --no-cache-dir github3.py==1.0.0a2 feedparser==6.0.2 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + +# Create volume for potential output files +VOLUME ["/app/output"] + +ENTRYPOINT ["python", "github-dork.py"] \ No newline at end of file diff --git a/README.md b/README.md index 51fd741..eb36e4f 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,79 @@ +[![Docker Build & Test](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml/badge.svg)](https://github.com/techgaun/github-dorks/actions/workflows/docker-build.yml) + # Github Dorks -[Github search](https://github.com/search) is quite powerful and useful feature and can be used to search sensitive data on the repositories. Collection of github dorks that can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. -### GitHub Dork Search Tool -[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. Its not a perfect tool at the moment but provides a basic functionality to automate the search on your repositories against the dorks specified in text file. +[Github Search](https://github.com/search) is a quite powerful and useful feature that can be used to search for sensitive data on repositories. Collection of Github dorks can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems. + +## GitHub Dork Search Tool + +[github-dork.py](github-dork.py) is a simple python tool that can search through your repository or your organization/user repositories. It's not a perfect tool at the moment but provides basic functionality to automate the search on your repositories against the dorks specified in the text file. + +### Installation -#### Installation This tool uses [github3.py](https://github.com/sigmavirus24/github3.py) to talk with GitHub Search API. Clone this repository and run: + +```shell +pip install . +``` + +### Docker Installation + +You can also run github-dorks using Docker for a consistent environment: + ```shell -pip install -r requirements.txt +# Build the Docker image +docker build -t github-dorks . + +# Run with a GitHub token (recommended) +docker run -e GH_TOKEN=your_github_token github-dorks -u someuser + +# Run with username/password +docker run -e GH_USER=your_username -e GH_PWD=your_password github-dorks -u someuser + +# Save results to a CSV file +docker run -v $(pwd)/output:/app/output -e GH_TOKEN=your_github_token github-dorks -u someuser -o /app/output/results.csv ``` -#### Usage +### Usage + ``` -GH_USER - Environment variable to specify github user -GH_PWD - Environment variable to specify password -GH_TOKEN - Environment variable to specify github token +GH_USER - Environment variable to specify Github user +GH_PWD - Environment variable to specify a password +GH_TOKEN - Environment variable to specify Github token GH_URL - Environment variable to specify GitHub Enterprise base URL ``` Some example usages are listed below: ```shell -python github-dork.py -r techgaun/github-dorks # search single repo +github-dork.py -r techgaun/github-dorks # search a single repo -python github-dork.py -u techgaun # search all repos of user +github-dork.py -u techgaun # search all repos of a user -python github-dork.py -u dev-nepal # search all repos of an organization +github-dork.py -u dev-nepal # search all repos of an organization -GH_USER=techgaun GH_PWD= python github-dork.py -u dev-nepal # search as authenticated user +GH_USER=techgaun GH_PWD= github-dork.py -u dev-nepal # search as authenticated user -GH_TOKEN= python github-dork.py -u dev-nepal # search using auth token +GH_TOKEN= github-dork.py -u dev-nepal # search using auth token -GH_URL=https://github.example.com python github-dork.py -u dev-nepal # search a GitHub Enterprise instance +GH_URL=https://github.example.com github-dork.py -u dev-nepal # search a GitHub Enterprise instance ``` -#### Limitations +### Limitations - Authenticated requests get a higher rate limit. But, since this tool waits for the api rate limit to be reset (which is usually less than a minute), it can be slightly slow. - Output formatting is not great. PR welcome - ~~Handle rate limit and retry. PR welcome~~ ### Contribution -Please consider contributing the dorks that can reveal potentially sensitive information in github. + +Please consider contributing dorks that can reveal potentially sensitive information on Github. ### List of Dorks -I am not categorizing at the moment. Instead I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). + +I am not categorizing at the moment. Instead, I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options [here](https://github.com/search#search_cheatsheet_pane). Dork | Description ------------------------------------------------|-------------------------------------------------------------------------- @@ -108,8 +135,8 @@ path:sites databases password | Drupal website database creden shodan_api_key language:python | Shodan API keys (try other languages too) filename:shadow path:etc | Contains encrypted passwords and account information of new unix systems filename:passwd path:etc | Contains user account information including encrypted passwords of traditional unix systems -extension:avastlic | Contains license keys for Avast! Antivirus -extension:dbeaver-data-sources.xml | DBeaver config containing MySQL Credentials +extension:avastlic "support.avast.com" | Contains license keys for Avast! Antivirus +filename:dbeaver-data-sources.xml | DBeaver config containing MySQL Credentials filename:.esmtprc password | esmtp configuration extension:json googleusercontent client_secret | OAuth credentials for accessing Google APIs HOMEBREW_GITHUB_API_TOKEN language:shell | Github token usually set by homebrew users @@ -118,3 +145,19 @@ xoxp OR xoxb | Slack bot and private tokens filename:logins.json | Firefox saved password collection (key3.db usually in same repo) filename:CCCam.cfg | CCCam Server config file msg nickserv identify filename:config | Possible IRC login passwords +filename:settings.py SECRET_KEY | Django secret keys (usually allows for session hijacking, RCE, etc) +filename:secrets.yml password | Usernames/passwords, Rails applications +filename:master.key path:config | Rails master key (used for decrypting `credentials.yml.enc` for Rails 5.2+) +filename:deployment-config.json | Created by sftp-deployment for Atom, contains server details and credentials +filename:.ftpconfig | Created by remote-ssh for Atom, contains SFTP/SSH server details and credentials +filename:.remote-sync.json | Created by remote-sync for Atom, contains FTP and/or SCP/SFTP/SSH server details and credentials +filename:sftp.json path:.vscode | Created by vscode-sftp for VSCode, contains SFTP/SSH server details and credentails +filename:sftp-config.json | Created by SFTP for Sublime Text, contains FTP/FTPS or SFTP/SSH server details and credentials +filename:WebServers.xml | Created by Jetbrains IDEs, contains webserver credentials with encoded passwords ([not encrypted!](https://intellij-support.jetbrains.com/hc/en-us/community/posts/207074025/comments/207034775)) +"api_hash" "api_id" | Telegram API token +"https://hooks.slack.com/services/" | Slack services URL often have secret API token as a suffix +filename:github-recovery-codes.txt | GitHub recovery key +filename:gitlab-recovery-codes.txt | GitLab recovery key +filename:discord_backup_codes.txt | Discord recovery key +extension:yaml cloud.redislabs.com | Redis credentials provided by Redis Labs found in a YAML file +extension:json cloud.redislabs.com | Redis credentials provided by Redis Labs found in a JSON file diff --git a/github-dork.py b/github-dork.py index 95ce286..2941b04 100644 --- a/github-dork.py +++ b/github-dork.py @@ -7,7 +7,7 @@ import time import feedparser from copy import copy -from sys import stderr +from sys import stderr, prefix gh_user = os.getenv('GH_USER', None) gh_pass = os.getenv('GH_PWD', None) @@ -27,8 +27,8 @@ def search_wrapper(gen): try: yield next(gen) except StopIteration: - raise - except github.exceptions.ForbiddenError as e: + return + except github.exceptions.ForbiddenError: search_rate_limit = gh.rate_limit()['resources']['search'] # limit_remaining = search_rate_limit['remaining'] reset_time = search_rate_limit['reset'] @@ -47,9 +47,10 @@ def metasearch(repo_to_search=None, user_to_search=None, gh_dorks_file=None, active_monit=None, + output_filename=None, refresh_time=60): if active_monit is None: - search(repo_to_search, user_to_search, gh_dorks_file, active_monit) + search(repo_to_search, user_to_search, gh_dorks_file, active_monit, output_filename) else: monit(gh_dorks_file, active_monit, refresh_time) @@ -82,9 +83,16 @@ def monit(gh_dorks_file=None, active_monit=None, refresh_time=60): def search(repo_to_search=None, user_to_search=None, gh_dorks_file=None, - active_monit=None): + active_monit=None, + output_filename=None): + if gh_dorks_file is None: - gh_dorks_file = 'github-dorks.txt' + for path_prefix in ['.', os.path.join(prefix, 'github-dorks/')]: + filename = os.path.join(path_prefix, 'github-dorks.txt') + if os.path.isfile(filename): + gh_dorks_file = filename + break + if not os.path.isfile(gh_dorks_file): raise Exception('Error, the dorks file path is not valid') if user_to_search: @@ -92,7 +100,15 @@ def search(repo_to_search=None, if repo_to_search: print("Scanning Repo: ", repo_to_search) found = False + + outputFile = None + if output_filename: + outputFile = open(output_filename, 'w') + with open(gh_dorks_file, 'r') as dork_file: + # Write CSV Header + if outputFile: + outputFile.write('Issue Type (Dork), Text Matches, File Path, Score/Relevance, URL of File\n') for dork in dork_file: dork = dork.strip() if not dork or dork[0] in '#;': @@ -115,12 +131,18 @@ def search(repo_to_search=None, 'score': search_result.score, 'url': search_result.html_url } - result = '\n'.join([ - 'Found result for {dork}', - 'Text matches: {text_matches}', 'File path: {path}', - 'Score/Relevance: {score}', 'URL of File: {url}', '' - ]).format(**fmt_args) - print(result) + + # Either write to file or print output + if outputFile: + outputFile.write('{dork}, {text_matches}, {path}, {score}, {url}\n'.format(**fmt_args)) + else: + result = '\n'.join([ + 'Found result for {dork}', + 'Text matches: {text_matches}', 'File path: {path}', + 'Score/Relevance: {score}', 'URL of File: {url}', '' + ]).format(**fmt_args) + print(result) + except github.exceptions.GitHubError as e: print('GitHubError encountered on search of dork: ' + dork) print(e) @@ -171,12 +193,21 @@ def main(): help='Monitors Github user private feed with feed token' ) + parser.add_argument( + '-o', + '--outputFile', + dest='output_filename', + action='store', + help='CSV File to write results to. This overwrites the file provided! Eg: out.csv' + ) + args = parser.parse_args() metasearch( repo_to_search=args.repo_to_search, user_to_search=args.user_to_search, gh_dorks_file=args.gh_dorks_file, - active_monit=args.active_monit) + active_monit=args.active_monit, + output_filename=args.output_filename) if __name__ == '__main__': diff --git a/github-dorks.txt b/github-dorks.txt index 02a8a7f..c5625a6 100644 --- a/github-dorks.txt +++ b/github-dorks.txt @@ -60,8 +60,8 @@ shodan_api_key language:json shodan_api_key language:ruby filename:shadow path:etc filename:passwd path:etc -extension:avastlic -extension:dbeaver-data-sources.xml +extension:avastlic "support.avast.com" +filename:dbeaver-data-sources.xml filename:sftp-config.json filename:.esmtprc password extension:json googleusercontent client_secret @@ -71,3 +71,20 @@ xoxp OR xoxb filename:logins.json filename:CCCam.cfg msg nickserv identify filename:config +filename:settings.py SECRET_KEY +filename:secrets.yml password +filename:master.key path:config +filename:deployment-config.json +filename:.ftpconfig +filename:.remote-sync.json +filename:sftp.json path:.vscode +filename:WebServers.xml +filename:jupyter_notebook_config.json +"api_hash" "api_id" +"https://hooks.slack.com/services/" +filename:github-recovery-codes.txt +filename:gitlab-recovery-codes.txt +filename:discord_backup_codes.txt +extension:yaml cloud.redislabs.com +extension:json cloud.redislabs.com +DATADOG_API_KEY language:shell diff --git a/requirements.txt b/requirements.txt index 9b4e16f..cfe346b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ github3.py==1.0.0a2 -feedparser==5.1.3 +feedparser==6.0.2 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..979e932 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + +with open('README.md', 'r') as f: + long_description = f.read() + +setup( + name='github-dorks', + version='0.1', + description='Find leaked secrets via github search.', + license='Apache License 2.0', + long_description=long_description, + author='Samar Dhwoj Acharya (@techgaun)', + long_description_content_type='text/markdown', + scripts=['github-dork.py'], + data_files=[('github-dorks', ['github-dorks.txt'])], + install_requires=[ + 'github3.py==4.0.1', + 'feedparser==6.0.2', + ], +)