In software development, managing code clarity and maintainability is critical. The Python Comment Manager Script is a versatile tool designed to analyze and process Python source files by either counting comments or removing them entirely. This article explores the script’s significance, its inner workings, and why it is an essential utility for developers.
The script is modular, with distinct functions for tokenization, comment processing, and user interaction. Below is a breakdown of its key components:
Python Comment Manager Script Code
# — _tokenize_file function (Should be the corrected version from the previous steps) —
def _tokenize_file(filepath):
“””
Helper function to open, read, and tokenize a file using BytesIO.
Returns generator or None on error.
“””
try:
with open(filepath, ‘rb’) as source_file:
source_bytes = source_file.read()
try:
source_bytes.decode(‘utf-8’)
except UnicodeDecodeError as e:
print(f”\nError: Could not decode file ‘{filepath}’ as UTF-8: {e}”, file=sys.stderr)
print(“Ensure the file is saved with UTF-8 encoding.”, file=sys.stderr)
return None
source_io = io.BytesIO(source_bytes)
return tokenize.tokenize(source_io.readline)
except FileNotFoundError:
print(f”\nError: Input file not found at ‘{filepath}'”, file=sys.stderr)
return None
except tokenize.TokenError as e:
print(f”\nError during initial tokenization of ‘{filepath}’: {e}”, file=sys.stderr)
return None
except IOError as e:
print(f”\nError reading file ‘{filepath}’: {e}”, file=sys.stderr)
return None
except Exception as e:
print(f”\nAn unexpected error occurred while reading/preparing ‘{filepath}’: {e}”, file=sys.stderr)
return None
# — Corrected remove_comments function with trailing space removal —
def remove_comments(input_filepath, output_filepath):
“””
Reads a Python file, removes comments, blank lines left by
comment-only lines, AND trailing whitespace from lines where comments
were removed, then writes the result to a new file.
Args:
input_filepath (str): Path to the input .py file.
output_filepath (str): Path to the output .py file.
Returns:
bool: True if successful, False otherwise.
“””
print(f”\nProcessing ‘{os.path.basename(input_filepath)}’ to remove comments…”)
token_generator = _tokenize_file(input_filepath)
if token_generator is None:
return False
try:
all_tokens = list(token_generator)
tokens_without_comments = [t for t in all_tokens if t.type != tokenize.COMMENT]
tokens_to_keep = []
for i, tok in enumerate(tokens_without_comments):
if tok.type == tokenize.NL and i > 0:
prev_tok = tokens_without_comments[i-1]
if prev_tok.type == tokenize.INDENT:
continue # Skip NL after INDENT if comment was removed
tokens_to_keep.append(tok)
# Untokenize might leave trailing whitespace where comments were
result_bytes = tokenize.untokenize(tokens_to_keep)
result_code_with_trailing_spaces = result_bytes.decode(‘utf-8’)
# *** FIX: Post-process to remove trailing whitespace line by line ***
cleaned_lines = []
# Use io.StringIO to handle different line endings gracefully
with io.StringIO(result_code_with_trailing_spaces) as f:
for line in f:
# rstrip() removes trailing whitespace (spaces, tabs, newlines)
# We add back the standard newline character ‘\n’
cleaned_lines.append(line.rstrip() + ‘\n’)
# Handle case where the original file might not end with a newline
if not result_code_with_trailing_spaces.endswith((‘\n’, ‘\r\n’)) and cleaned_lines:
# Remove the extra newline we added if the original didn’t have one
cleaned_lines[-1] = cleaned_lines[-1].rstrip(‘\n’)
final_code = “”.join(cleaned_lines)
# *******************************************************************
except tokenize.TokenError as e:
print(f”Error tokenizing/untokenizing file ‘{input_filepath}’: {e}”, file=sys.stderr)
return False
except UnicodeDecodeError as e:
print(f”Error decoding processed data from ‘{input_filepath}’ back to UTF-8: {e}”, file=sys.stderr)
return False
except Exception as e:
print(f”An unexpected error occurred during token processing/reconstruction: {e}”, file=sys.stderr)
return False
# Write the final, cleaned code
try:
output_dir = os.path.dirname(output_filepath)
if output_dir and not os.path.exists(output_dir):
print(f”Creating output directory: {output_dir}”)
os.makedirs(output_dir, exist_ok=True)
with open(output_filepath, ‘w’, encoding=’utf-8′) as output_file:
# Write the cleaned code
output_file.write(final_code)
print(f”\nSuccessfully removed comments and cleaned trailing spaces.”)
print(f”Output saved to ‘{output_filepath}'”)
return True
except IOError as e:
print(f”\nError writing output file ‘{output_filepath}’: {e}”, file=sys.stderr)
return False
except OSError as e:
print(f”\nError creating output directory for ‘{output_filepath}’: {e}”, file=sys.stderr)
return False
except Exception as e:
print(f”\nAn unexpected error occurred while writing ‘{output_filepath}’: {e}”, file=sys.stderr)
return False
# — count_comments function (Keep as before, needs the check for None generator) —
def count_comments(input_filepath):
“”” Counts comment tokens “””
print(f”\nAnalyzing ‘{os.path.basename(input_filepath)}’ for comments…”)
token_generator = _tokenize_file(input_filepath)
if token_generator is None: return None
comment_count = 0
try:
for token_info in token_generator:
if token_info.type == tokenize.COMMENT:
comment_count += 1
except tokenize.TokenError as e:
print(f”Error processing tokens in ‘{input_filepath}’: {e}”, file=sys.stderr)
return None
except Exception as e:
print(f”An unexpected error occurred during token processing: {e}”, file=sys.stderr)
return None
return comment_count
# — run_interactive function (Keep as before) —
def run_interactive():
“””Handles the interactive user prompts and calls the appropriate functions.”””
print(“————————————“)
print(” Python Comment Manager Script “)
print(“————————————“)
# 1. Get desired action
while True:
print(“\nChoose an action:”)
print(” 1: Count comments in a file”)
print(” 2: Remove comments from a file”)
print(” Q: Quit”)
action_choice = input(“Enter your choice (1, 2, or Q): “).strip().upper()
if action_choice in [‘1’, ‘2’, ‘Q’]:
break
else:
print(“Invalid choice. Please enter 1, 2, or Q.”)
if action_choice == ‘Q’:
print(“Exiting.”)
return
# 2. Get input file path
input_filepath = “”
while True:
try:
input_filepath = input(“Enter the full path to the input Python file (.py): “).strip()
if not input_filepath:
print(“File path cannot be empty. Please try again.”)
continue
if not os.path.isfile(input_filepath):
print(f”Error: File not found at ‘{input_filepath}’. Please check the path and try again.”)
continue
if not input_filepath.lower().endswith((‘.py’, ‘.pyw’)):
print(f”Warning: File ‘{os.path.basename(input_filepath)}’ does not have a typical Python extension (.py, .pyw).”)
proceed = input(“Continue anyway? (y/n): “).strip().lower()
if proceed != ‘y’:
input_filepath = “” # Reset to loop again
continue
break # Valid path entered
except KeyboardInterrupt:
print(“\nOperation cancelled by user.”)
return
except Exception as e:
print(f”An error occurred during input: {e}”)
return
# — Perform Action —
try:
if action_choice == ‘1’: # Count Comments
comment_count = count_comments(input_filepath)
if comment_count is not None:
print(f”\n————————————“)
print(f”Result: Found {comment_count} comment tokens in ‘{os.path.basename(input_filepath)}’.”)
print(f”————————————“)
elif action_choice == ‘2’: # Remove Comments
output_filepath = “”
while True:
base, ext = os.path.splitext(input_filepath)
if not ext: ext = ‘.py’
default_output_name = f”{base}_no_comments{ext}”
try:
output_filepath_prompt = (
f”Enter the full path for the output file ”
f”(leave blank to use default: ‘{os.path.basename(default_output_name)}’): ”
)
output_filepath = input(output_filepath_prompt).strip()
if not output_filepath:
output_filepath = default_output_name
print(f”Using default output path: ‘{output_filepath}'”)
if os.path.abspath(input_filepath) == os.path.abspath(output_filepath):
print(“\nError: Output file path cannot be the same as the input file path.”)
print(“Please enter a different path or leave blank for the default.”)
output_filepath = “” # Reset to loop again
continue
if os.path.exists(output_filepath):
overwrite = input(f”Warning: Output file ‘{output_filepath}’ already exists. Overwrite? (y/n): “).strip().lower()
if overwrite != ‘y’:
print(“Please enter a different output file path.”)
output_filepath = “” # Reset to loop again
continue
break # Valid output path obtained
except KeyboardInterrupt:
print(“\nOperation cancelled by user.”)
return
remove_comments(input_filepath, output_filepath)
except KeyboardInterrupt:
print(“\nOperation cancelled by user.”)
except Exception as e:
print(f”\nAn unexpected critical error occurred: {e}”, file=sys.stderr)
print(“\nOperation finished.”)
# — Script Execution —
if __name__ == “__main__”:
run_interactive()